代码拉取完成,页面将自动刷新
import os
import sys
PROJECT_PATH = '/home/space/Documents/github/SPLT/'
sys.path.append(PROJECT_PATH + 'lib')
sys.path.append(PROJECT_PATH + 'lib/slim')
image_root = './video/'
import cv2
import random
import numpy as np
from time import time
# import torch
import tensorflow as tf
from utils.tracking_utils import build_init_graph, build_box_predictor, restore_model, get_configs_from_pipeline_file
from utils.tracking_utils import crop_search_region, gen_search_patch_Hao, crop_template_Hao
from utils.tracking_utils import show_res, compile_results
from core.model_builder import build_man_model
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)
# V_NET = 'R'
V_NET = 'resnet50'#mobilenet,vgg16,resnet50
V_OPTION = 'VID_N'#lasot_N,lasot_Y,VID_Y,VID_N
S_NET = 'M'
V_T = 0.7
G_V_T = 0.8
O_T_L = 0.3 # rpn_sore < Object_thres_low
O_T_H = 0.7 # Object_thres_high < rpn_sore
O_T_C = 0.6
if V_NET == 'resnet101':
iteration = 131249
if V_NET == 'resnet50':
iteration = 65624
if V_NET == 'mobilenet':
iteration = 8202
v_name = 'V_%s_%s-%d'%(V_NET,V_OPTION,iteration)
pretrained_model = os.path.join(PROJECT_PATH,'Verifier/ckpt/',v_name)
if V_NET == 'resnet101':
from Verifier.resnet101_bin import _image_to_feat
if V_NET == 'resnet50':
from Verifier.resnet50_bin import _image_to_feat
elif V_NET == 'mobilenet':
from Verifier.mobilenet import _image_to_feat
elif V_NET == 'vgg16':
from Verifier.vgg16 import _image_to_feat
class MobileTracker(object):
def __init__(self, vot=False, dis=False):
self.vot = vot
self.dis = dis
config = tf.ConfigProto(allow_soft_placement=True)
# config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)
self.V_image_op = tf.placeholder(tf.float32, (None, 128, 128, 3), name='V_input_image')
self.V_feat_op = _image_to_feat(self.V_image_op, is_training=False, reuse=False)
variables = tf.global_variables() # list
# Initialize all variables first
self.sess.run(tf.variables_initializer(variables, name='init')) # tf.variables_initializer
restorer = tf.train.Saver(variables)
restorer.restore(self.sess, pretrained_model)
config_file = PROJECT_PATH + 'RPN/ssd_mobilenet_tracking.config'
checkpoint_dir = PROJECT_PATH + 'RPN/dump'
model_config, train_config, input_config, eval_config \
= get_configs_from_pipeline_file(config_file)
model_scope = 'model'
model = build_man_model(model_config=model_config, is_training=False)
self.initConstantOp = tf.placeholder(tf.float32, [1, 1, 1, 512])
self.initFeatOp, self.initInputOp \
= build_init_graph(model, model_scope, reuse=None)
self.pre_box_tensor, self.scores_tensor, self.input_cur_image \
= build_box_predictor(model, model_scope, self.initConstantOp, reuse=None)
variables_to_restore = tf.global_variables()
restore_model(self.sess, model_scope, checkpoint_dir, variables_to_restore, V_NET)
with tf.keras.utils.CustomObjectScope({
'relu6': tf.keras.layers.LeakyReLU(0),
'DepthwiseConv2D': tf.keras.layers.DepthwiseConv2D}):
self.branch_z = tf.keras.models.load_model(
PROJECT_PATH + 'Skim/branch_z.h5',
custom_objects={"tf": tf})
self.branch_search = tf.keras.models.load_model(
PROJECT_PATH + 'Skim/branch_search.h5',
custom_objects={"tf": tf})
def init_first(self, image, region):
# region.x region.y region.width region.height
if self.vot:
gt_for_siamfc = np.array([
region.x,
region.y,
region.width,
region.height
]).astype(int)
init_gt = [
region.y,
region.x,
region.y + region.height,
region.x + region.width
] # ymin xmin ymax xmax
else:
gt_for_siamfc = np.array([
region[0],
region[1],
region[2],
region[3]
]).astype(int)
init_gt = [
region[1],
region[0],
region[1] + region[3],
region[0] + region[2]
] # ymin xmin ymax xmax
self.expand_channel = False
if image.ndim < 3:
image = np.expand_dims(image, axis=2)
image = np.repeat(image, repeats=3, axis=2)
init_img = image
self.expand_channel = True
else:
init_img = image
# (ymin,xmin,ymax,xmax)
gt_boxes = np.zeros((1, 4))
gt_boxes[0, 0] = init_gt[0] / float(init_img.shape[0])
gt_boxes[0, 1] = init_gt[1] / float(init_img.shape[1])
gt_boxes[0, 2] = init_gt[2] / float(init_img.shape[0])
gt_boxes[0, 3] = init_gt[3] / float(init_img.shape[1])
pad_x = 36.0 / 264.0 * (gt_boxes[0, 3] - gt_boxes[0, 1]) * init_img.shape[1]
pad_y = 36.0 / 264.0 * (gt_boxes[0, 2] - gt_boxes[0, 0]) * init_img.shape[0]
cx = (gt_boxes[0, 3] + gt_boxes[0, 1]) / 2.0 * init_img.shape[1]
cy = (gt_boxes[0, 2] + gt_boxes[0, 0]) / 2.0 * init_img.shape[0]
startx = gt_boxes[0, 1] * init_img.shape[1] - pad_x
starty = gt_boxes[0, 0] * init_img.shape[0] - pad_y
endx = gt_boxes[0, 3] * init_img.shape[1] + pad_x
endy = gt_boxes[0, 2] * init_img.shape[0] + pad_y
left_pad = max(0, int(-startx))
top_pad = max(0, int(-starty))
right_pad = max(0, int(endx - init_img.shape[1] + 1))
bottom_pad = max(0, int(endy - init_img.shape[0] + 1))
startx = int(startx + left_pad)
starty = int(starty + top_pad)
endx = int(endx + left_pad)
endy = int(endy + top_pad)
img1_xiaobai = init_img.copy()
if top_pad or left_pad or bottom_pad or right_pad:
r = np.pad(
img1_xiaobai[:, :, 0],
((top_pad, bottom_pad), (left_pad, right_pad)),
mode='constant', constant_values=128)
g = np.pad(
img1_xiaobai[:, :, 1],
((top_pad, bottom_pad), (left_pad, right_pad)),
mode='constant', constant_values=128)
b = np.pad(
img1_xiaobai[:, :, 2],
((top_pad, bottom_pad), (left_pad, right_pad)),
mode='constant', constant_values=128)
r = np.expand_dims(r, 2)
g = np.expand_dims(g, 2)
b = np.expand_dims(b, 2)
img1_xiaobai = np.concatenate((r, g, b), axis=2)
# gt_boxes resize
init_img_crop = img1_xiaobai[starty:endy, startx:endx]
init_img_crop = cv2.resize(init_img_crop, (128, 128))
self.last_gt = init_gt
self.init_feature_maps = self.sess.run(
self.initFeatOp,
feed_dict={self.initInputOp: init_img_crop})
self.mean = np.reshape(np.array([122.6789, 116.6688, 104.0069]), (1, 1, 3))
ori_ymin = int(init_gt[0])
ori_xmin = int(init_gt[1])
ori_ymax = int(init_gt[2] + 1)
ori_xmax = int(init_gt[3] + 1)
unscaled_win = image[ori_ymin:ori_ymax, ori_xmin:ori_xmax]
template_image = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
template_image -= self.mean
template_image_ = template_image[np.newaxis, :]
self.template_feat = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: template_image_})
z_im = crop_template_Hao(init_img, gt_for_siamfc)
z_im = z_im[None, :, :, :].astype(np.float32)
self.z_feat = self.branch_z.predict(z_im)
# warm
self.branch_search.predict([self.z_feat.repeat(1, axis=0), np.random.rand(1,256,256,3)])#[:,1]
self.V_thres = V_T
self.global_V_thres = G_V_T
self.Object_thres_low = O_T_L # rpn_sore < Object_thres_low
self.Object_thres_high = O_T_H # Object_thres_high < rpn_sore
self.Object_thres_center = O_T_C
self.EXTREM = 0.02
self.LargeDist = 100
self.k = 20
self.target_w = init_gt[3] - init_gt[1]
self.target_h = init_gt[2] - init_gt[0]
self.first_w = init_gt[3] - init_gt[1]
self.first_h = init_gt[2] - init_gt[0]
self.i = 0
self.startx = 0
self.starty = 0
self.SEARCH_K = 4
def center_search(
self,
image_,
base_h, base_w,
ori_scores, ori_best_idx, ori_detection_box, ori_dist_min):
# search_gt = (np.array(self.last_gt)).copy()
search_gt = np.zeros((4,))
search_gt[0] = image_.shape[0] / 2.0 - base_h / 2.0
search_gt[2] = image_.shape[0] / 2.0 + base_h / 2.0
search_gt[1] = image_.shape[1] / 2.0 - base_w / 2.0
search_gt[3] = image_.shape[1] / 2.0 + base_w / 2.0
cur_img_array, win_loc1, scale1 = \
crop_search_region(image_, search_gt, 300, mean_rgb=128)
detection_box_ori1, scores1 = self.sess.run(
[self.pre_box_tensor, self.scores_tensor],
feed_dict={
self.input_cur_image: cur_img_array,
self.initConstantOp: self.init_feature_maps})
if scores1[0, 0] > self.Object_thres_center:
detection_box_ori1[:, 0] = detection_box_ori1[:, 0] * scale1[0] + win_loc1[0]
detection_box_ori1[:, 1] = detection_box_ori1[:, 1] * scale1[1] + win_loc1[1]
detection_box_ori1[:, 2] = detection_box_ori1[:, 2] * scale1[0] + win_loc1[0]
detection_box_ori1[:, 3] = detection_box_ori1[:, 3] * scale1[1] + win_loc1[1]
detection_box_ori = detection_box_ori1.copy()
# max_idx = 0
search_box1 = detection_box_ori[0]
search_box1[0] = np.clip(search_box1[0], 0, image_.shape[0] - 1)
search_box1[2] = np.clip(search_box1[2], 0, image_.shape[0] - 1)
search_box1[1] = np.clip(search_box1[1], 0, image_.shape[1] - 1)
search_box1[3] = np.clip(search_box1[3], 0, image_.shape[1] - 1)
if (int(search_box1[0]) == int(search_box1[2])
or int(search_box1[1]) == int(search_box1[3])):
dist_min = self.LargeDist
else:
unscaled_win = image_[
int(search_box1[0]):int(search_box1[2]),
int(search_box1[1]):int(search_box1[3])]
win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win -= self.mean
win_input = win[np.newaxis, :]
candidate_feat = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dist_min = np.sum(np.square(self.template_feat - candidate_feat))
if dist_min < self.V_thres:
best_idx = 0
scores = scores1.copy()
detection_box = detection_box_ori[best_idx]
else:
search_box1 = detection_box_ori[:self.k]
search_box = np.zeros_like(search_box1) # x1 y1 x2 y2
search_box[:, 0] = search_box1[:, 1]
search_box[:, 1] = search_box1[:, 0]
search_box[:, 2] = search_box1[:, 3]
search_box[:, 3] = search_box1[:, 2]
search_box[:, 2] = search_box[:, 2] - search_box[:, 0] # x y w h
search_box[:, 3] = search_box[:, 3] - search_box[:, 1]
search_box[:, 2] = np.maximum(search_box[:, 2], 3)
search_box[:, 3] = np.maximum(search_box[:, 3], 3)
search_box[:, 0] = np.maximum(search_box[:, 0], 0)
search_box[:, 1] = np.maximum(search_box[:, 1], 0)
search_box[:, 0] = np.minimum(
search_box[:, 0],
image_.shape[1] - search_box[:, 2] - 1)
search_box[:, 1] = np.minimum(
search_box[:, 1],
image_.shape[0] - search_box[:, 3] - 1)
ID = np.arange(self.k)
O_mask = (scores1[0, :self.k] > self.Object_thres_low)
ID_obj = ID[O_mask]
num_object = int(np.sum(O_mask))
win_input = np.zeros((num_object, 128, 128, 3))
starty = search_box[O_mask, 1]
startx = search_box[O_mask, 0]
endy = search_box[O_mask, 3] + search_box[O_mask, 1]
endx = search_box[O_mask, 2] + search_box[O_mask, 0]
for i in range(num_object):
unscaled_win = image_[
int(starty[i]):int(endy[i]),
int(startx[i]):int(endx[i])]
win_input[i] = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win_input = win_input - self.mean.reshape((1, 1, 1, 3))
candidate_feats = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dists = np.sum(np.square(self.template_feat - candidate_feats), axis=-1)
min_idx1 = np.argmin(dists)
if (dists[min_idx1] < self.V_thres
and scores1[0, ID_obj[min_idx1]] > self.Object_thres_high):
dist_min = dists[min_idx1]
best_idx = ID_obj[min_idx1]
scores = scores1.copy()
detection_box = detection_box_ori[best_idx]
else:
return ori_scores, ori_best_idx, ori_detection_box, ori_dist_min
# detection_box = detection_box_ori[max_idx]
return scores, best_idx, detection_box, dist_min
else:
return ori_scores, ori_best_idx, ori_detection_box, ori_dist_min
def track(self, image):
self.i += 1
cur_ori_img = image
cur_img_array, win_loc, scale \
= crop_search_region(cur_ori_img, self.last_gt, 300, mean_rgb=128)
detection_box_ori, scores = self.sess.run(
[self.pre_box_tensor, self.scores_tensor],
feed_dict={
self.input_cur_image: cur_img_array,
self.initConstantOp: self.init_feature_maps})
detection_box_ori[:, 0] = detection_box_ori[:, 0] * scale[0] + win_loc[0]
detection_box_ori[:, 1] = detection_box_ori[:, 1] * scale[1] + win_loc[1]
detection_box_ori[:, 2] = detection_box_ori[:, 2] * scale[0] + win_loc[0]
detection_box_ori[:, 3] = detection_box_ori[:, 3] * scale[1] + win_loc[1]
A_candis = ((detection_box_ori[:self.k, 3] - detection_box_ori[:self.k, 1])
* (detection_box_ori[:self.k, 2] - detection_box_ori[:self.k, 0]))
A_lastgt = ((self.last_gt[3] - self.last_gt[1])
* (self.last_gt[2] - self.last_gt[0]))
x1 = np.maximum(detection_box_ori[:self.k, 1], self.last_gt[1])
y1 = np.maximum(detection_box_ori[:self.k, 0], self.last_gt[0])
x2 = np.minimum(detection_box_ori[:self.k, 3], self.last_gt[3])
y2 = np.minimum(detection_box_ori[:self.k, 2], self.last_gt[2])
inter = np.maximum((x2 - x1), 0) * np.maximum((y2 - y1), 0)
IOU = inter / (A_candis + A_lastgt - inter)
ID = np.arange(self.k)
threshold = 0.4
I_mask = IOU > threshold
ID_iou = ID[I_mask]
if np.sum(I_mask) > 0:
best_idx = ID_iou[np.argmax(scores[0, :self.k][I_mask])]
else:
best_idx = 0
search_box1 = detection_box_ori[best_idx]
search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1)
search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1)
search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1)
search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1)
if (int(search_box1[0]) == int(search_box1[2])
or int(search_box1[1]) == int(search_box1[3])):
dist_min = self.LargeDist
else:
unscaled_win = image[
int(search_box1[0]):int(search_box1[2]),
int(search_box1[1]):int(search_box1[3])]
win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win -= self.mean
win_input = win[np.newaxis, :]
candidate_feat = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dist_min = np.sum(np.square(self.template_feat - candidate_feat))
if dist_min > self.V_thres:
search_box1 = detection_box_ori[:self.k]
search_box = np.zeros_like(search_box1) # x1 y1 x2 y2
search_box[:, 0] = search_box1[:, 1]
search_box[:, 1] = search_box1[:, 0]
search_box[:, 2] = search_box1[:, 3]
search_box[:, 3] = search_box1[:, 2]
search_box[:, 2] = search_box[:, 2] - search_box[:, 0] # x y w h
search_box[:, 3] = search_box[:, 3] - search_box[:, 1]
search_box[:, 2] = np.maximum(search_box[:, 2], 3)
search_box[:, 3] = np.maximum(search_box[:, 3], 3)
search_box[:, 0] = np.maximum(search_box[:, 0], 0)
search_box[:, 1] = np.maximum(search_box[:, 1], 0)
search_box[:, 0] = np.minimum(
search_box[:, 0],
cur_ori_img.shape[1] - search_box[:, 2] - 1)
search_box[:, 1] = np.minimum(
search_box[:, 1],
cur_ori_img.shape[0] - search_box[:, 3] - 1)
if scores[0, 0] > self.Object_thres_low:
O_mask = (scores[0, :self.k] > self.Object_thres_low)
ID_obj = ID[O_mask]
num_object = int(np.sum(O_mask))
win_input = np.zeros((num_object, 128, 128, 3))
starty = search_box[O_mask, 1]
startx = search_box[O_mask, 0]
endy = search_box[O_mask, 3] + search_box[O_mask, 1]
endx = search_box[O_mask, 2] + search_box[O_mask, 0]
for i in range(num_object):
unscaled_win = image[
int(starty[i]):int(endy[i]),
int(startx[i]):int(endx[i])]
win_input[i] = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win_input = win_input - self.mean.reshape((1, 1, 1, 3))
candidate_feats = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dists = np.sum(np.square(self.template_feat - candidate_feats), axis=-1)
dists1 = dists.copy()
for i in range(num_object):
if ID_obj[i] not in ID_iou:
dists1[i] = self.LargeDist # IOU < threshold
if np.min(dists1) < self.V_thres:
best_idx = ID_obj[np.argmin(dists1)]
dist_min = np.min(dists1)
elif np.min(dists) < self.V_thres:
best_idx = ID_obj[np.argmin(dists)]
dist_min = np.min(dists)
else:
dist_min = self.LargeDist
detection_box = detection_box_ori[best_idx]
if scores[0, best_idx] < self.Object_thres_low:
scores, best_idx, detection_box, dist_min \
= self.center_search(
cur_ori_img,
(self.last_gt[2] - self.last_gt[0]),
(self.last_gt[3] - self.last_gt[1]),
scores, best_idx, detection_box, dist_min)
if dist_min > self.V_thres:
scores, best_idx, detection_box, dist_min \
= self.center_search(
cur_ori_img,
self.first_h,
self.first_w,
scores, best_idx, detection_box, dist_min)
if dist_min > self.V_thres:
scores, best_idx, detection_box, dist_min \
= self.center_search(
cur_ori_img,
self.first_h / 2.0,
self.first_w / 2.0,
scores, best_idx, detection_box, dist_min)
if dist_min > self.V_thres:
scores, best_idx, detection_box, dist_min \
= self.center_search(
cur_ori_img,
self.first_h * 2.0,
self.first_w * 2.0,
scores, best_idx, detection_box, dist_min)
# print scores[0,max_idx]
if scores[0, best_idx] < self.Object_thres_low:
"""-------------------------------------------------------------------------"""
softmax_test_, pos_i = gen_search_patch_Hao(cur_ori_img, self.first_w, self.first_h)
softmax_test = softmax_test_.astype(np.float32)
batch_sz = 64
if softmax_test.shape[0] <= batch_sz:
kk = softmax_test
cls_out = self.branch_search.predict([self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1)
elif softmax_test.shape[0] > batch_sz:
cls_out_list = []
for_i = softmax_test.shape[0] / batch_sz
for jj in range(for_i):
kk = softmax_test[batch_sz * jj:batch_sz * (jj + 1)]
cls_out_list.append(
self.branch_search.predict([self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1) )
if softmax_test.shape[0] % batch_sz == 0:
pass
else:
kk = softmax_test[batch_sz * (jj + 1):]
cls_out_list.append(
self.branch_search.predict([self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1) )
cls_out = np.concatenate(cls_out_list)
search_rank = np.argsort(-cls_out)
pos_i = pos_i[search_rank]
cls_out = cls_out[search_rank]
"""-------------------------------------------------------------------------"""
self.SEARCH_K = np.minimum(pos_i.shape[0], self.SEARCH_K)
if self.SEARCH_K > 1:
search_num = self.SEARCH_K - 1
else:
search_num = 1
detection_box1_all = np.zeros([search_num,4])
scores1_all = np.zeros([1,search_num])
for s_i in range(search_num):
search_gt = pos_i[s_i]
cur_img_array1, win_loc1, scale1 \
= crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128)
detection_box1, scores1 = self.sess.run(
[self.pre_box_tensor, self.scores_tensor],
feed_dict={
self.input_cur_image: cur_img_array1,
self.initConstantOp: self.init_feature_maps})
detection_box1[0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0]
detection_box1[0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1]
detection_box1[0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0]
detection_box1[0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1]
scores1_all[0,s_i] = scores1[0, 0]
detection_box1_all[s_i] = detection_box1[0].copy()
rank_idx = np.argsort(-scores1_all).reshape(-1)
scores1 = scores1_all[:,rank_idx]
detection_box1 = detection_box1_all[rank_idx,:]
if scores1[0, 0] > self.Object_thres_high:
detection_box_ori = detection_box1.copy()
# max_idx = 0
search_box1 = detection_box_ori[0]
search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1)
search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1)
search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1)
search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1)
if (int(search_box1[0]) == int(search_box1[2])
or int(search_box1[1]) == int(search_box1[3])):
# score_max = -1
# score_max = 0 # 0 is the minimum score for SINT
dist_min = self.LargeDist
else:
search_box1 = [
search_box1[1], search_box1[0],
search_box1[3] - search_box1[1],
search_box1[2] - search_box1[0]]
search_box1 = np.reshape(search_box1, (4,))
unscaled_win = image[
int(search_box1[1]):int(search_box1[3] + search_box1[1]),
int(search_box1[0]):int(search_box1[2] + search_box1[0])]
win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win -= self.mean
win_input = win[np.newaxis, :]
candidate_feat = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dist_min = np.sum(np.square(self.template_feat - candidate_feat))
if dist_min < self.global_V_thres:
scores = scores1.copy()
best_idx = 0
detection_box = detection_box_ori[best_idx]
elif dist_min > self.global_V_thres and self.SEARCH_K-search_num > 0:
search_gt = pos_i[search_num]
cur_img_array1, win_loc1, scale1 \
= crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128)
detection_box1, scores1 = self.sess.run(
[self.pre_box_tensor, self.scores_tensor],
feed_dict={
self.input_cur_image: cur_img_array1,
self.initConstantOp: self.init_feature_maps})
detection_box1[0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0]
detection_box1[0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1]
detection_box1[0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0]
detection_box1[0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1]
detection_box_ori = detection_box1.copy()
# max_idx = 0
search_box1 = detection_box_ori[0]
search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1)
search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1)
search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1)
search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1)
if (int(search_box1[0]) == int(search_box1[2])
or int(search_box1[1]) == int(search_box1[3])):
dist_min = self.LargeDist
else:
search_box1 = [
search_box1[1], search_box1[0],
search_box1[3] - search_box1[1],
search_box1[2] - search_box1[0]]
search_box1 = np.reshape(search_box1, (4,))
unscaled_win = image[
int(search_box1[1]):int(search_box1[3] + search_box1[1]),
int(search_box1[0]):int(search_box1[2] + search_box1[0])]
win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64)
win -= self.mean
win_input = win[np.newaxis, :]
candidate_feat = self.sess.run(
self.V_feat_op,
feed_dict={self.V_image_op: win_input})
dist_min = np.sum(np.square(self.template_feat - candidate_feat))
if dist_min < self.global_V_thres:
scores = scores1.copy()
best_idx = 0
detection_box = detection_box_ori[best_idx]
if scores[0, best_idx] < self.Object_thres_low:
x_c = (detection_box[3] + detection_box[1]) / 2.0
y_c = (detection_box[0] + detection_box[2]) / 2.0
w1 = self.last_gt[3] - self.last_gt[1]
h1 = self.last_gt[2] - self.last_gt[0]
x1 = x_c - w1 / 2.0
y1 = y_c - h1 / 2.0
x2 = x_c + w1 / 2.0
y2 = y_c + h1 / 2.0
self.last_gt = np.float32([y1, x1, y2, x2])
else:
self.last_gt = detection_box
self.target_w = detection_box[3] - detection_box[1]
self.target_h = detection_box[2] - detection_box[0]
if self.last_gt[0] < 0:
self.last_gt[0] = 0
self.last_gt[2] = self.target_h
if self.last_gt[1] < 0:
self.last_gt[1] = 0
self.last_gt[3] = self.target_w
if self.last_gt[2] > cur_ori_img.shape[0]:
self.last_gt[2] = cur_ori_img.shape[0] - 1
self.last_gt[0] = cur_ori_img.shape[0] - 1 - self.target_h
if self.last_gt[3] > cur_ori_img.shape[1]:
self.last_gt[3] = cur_ori_img.shape[1] - 1
self.last_gt[1] = cur_ori_img.shape[1] - 1 - self.target_w
self.target_w = (self.last_gt[3] - self.last_gt[1])
self.target_h = (self.last_gt[2] - self.last_gt[0])
width = self.last_gt[3] - self.last_gt[1]
height = self.last_gt[2] - self.last_gt[0]
if (scores[0, best_idx] > self.Object_thres_high
and dist_min < self.V_thres):
confidence_score = 0.99
if self.dis:
show_res(
image,
np.array(self.last_gt, dtype=np.int32),
'SPLT')
elif (scores[0, best_idx] < self.Object_thres_low
and dist_min > self.V_thres):
confidence_score = np.nan
if self.dis:
show_res(
image,
None,
'SPLT')
elif dist_min < self.EXTREM:
confidence_score = 0.99
if self.dis:
show_res(
image,
np.array(self.last_gt, dtype=np.int32),
'SPLT')
else:
confidence_score = scores[0, best_idx]
if self.dis:
show_res(
image,
np.array(self.last_gt, dtype=np.int32),
'SPLT')
if self.vot:
return vot.Rectangle(
float(self.last_gt[1]),
float(self.last_gt[0]),
float(width),
float(height)
), confidence_score
else:
return np.array([
float(self.last_gt[1]),
float(self.last_gt[0]),
float(width),
float(height)
]), confidence_score
# if __name__ == "__main__":
use_vot = False
display = True
tracker = MobileTracker(vot=use_vot, dis=display)
titles = os.listdir(image_root)
titles.sort()
titles = [title for title in titles if not title.endswith("txt")]
precisions = np.zeros(len(titles))
precisions_auc = np.zeros(len(titles))
ious = np.zeros(len(titles))
lengths = np.zeros(len(titles))
speed = np.zeros(len(titles))
for title_id in range(1):
title = 'yamaha'
image_path = image_root + title + '/color/'
gt_path = image_root + title + '/groundtruth.txt'
try:
gt_tmp = np.loadtxt(gt_path)
except:
gt_tmp = np.loadtxt(gt_path, delimiter=',')
num_frames = gt_tmp.shape[0]
gt = gt_tmp.copy()
frame_list = os.listdir(image_path)
frame_list = [frame for frame in frame_list if frame.endswith('jpg')]
frame_list.sort()
image = cv2.imread(image_path + frame_list[0])
selection = gt[0]
box_pred = np.zeros([num_frames, 4])
box_pred[0] = gt[0].copy()
tracker.init_first(image, selection)
speed = 0
for i in range(1, num_frames):
imagefile = image_path + frame_list[i]
image = cv2.imread(imagefile)
s_t = time()
region, confidence = tracker.track(image)
speed += time() - s_t
box_pred[i] = region.copy()
index1 = []
for t in range(gt.shape[0]):
if np.isnan(gt[t, 0]):
index1.append(t)
gt = np.delete(gt, index1, axis=0)
box_pred = np.delete(box_pred, index1, axis=0)
lengths[title_id], precisions[title_id], precisions_auc[title_id], ious[title_id] = compile_results(
gt.astype(np.float32), box_pred.astype(np.float32), 20)
print (num_frames - 1) * 1.0 / speed
print '{:0>2d} {:12s} -- Precision: {:6.2f} -- Precisions AUC: {:6.2f} -- IOU: {:6.2f}'.format(
title_id,
titles[title_id],
precisions[title_id],
precisions_auc[title_id],
ious[title_id])
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。