add StrongSORT Tacker
This commit is contained in:
parent
ffc2e99678
commit
b7d8b3266f
93 changed files with 20230 additions and 6 deletions
2
feeder/trackers/deepocsort/__init__.py
Normal file
2
feeder/trackers/deepocsort/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
from . import args
|
||||
from . import ocsort
|
110
feeder/trackers/deepocsort/args.py
Normal file
110
feeder/trackers/deepocsort/args.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
import argparse
|
||||
|
||||
|
||||
def make_parser():
|
||||
parser = argparse.ArgumentParser("OC-SORT parameters")
|
||||
|
||||
# distributed
|
||||
parser.add_argument("-b", "--batch-size", type=int, default=1, help="batch size")
|
||||
parser.add_argument("-d", "--devices", default=None, type=int, help="device for training")
|
||||
|
||||
parser.add_argument("--local_rank", default=0, type=int, help="local rank for dist training")
|
||||
parser.add_argument("--num_machines", default=1, type=int, help="num of node for training")
|
||||
parser.add_argument("--machine_rank", default=0, type=int, help="node rank for multi-node training")
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--exp_file",
|
||||
default=None,
|
||||
type=str,
|
||||
help="pls input your expriment description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test",
|
||||
dest="test",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Evaluating on test-dev set.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"opts",
|
||||
help="Modify config options using the command-line",
|
||||
default=None,
|
||||
nargs=argparse.REMAINDER,
|
||||
)
|
||||
|
||||
# det args
|
||||
parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
|
||||
parser.add_argument("--conf", default=0.1, type=float, help="test conf")
|
||||
parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold")
|
||||
parser.add_argument("--tsize", default=[800, 1440], nargs="+", type=int, help="test img size")
|
||||
parser.add_argument("--seed", default=None, type=int, help="eval seed")
|
||||
|
||||
# tracking args
|
||||
parser.add_argument("--track_thresh", type=float, default=0.6, help="detection confidence threshold")
|
||||
parser.add_argument(
|
||||
"--iou_thresh",
|
||||
type=float,
|
||||
default=0.3,
|
||||
help="the iou threshold in Sort for matching",
|
||||
)
|
||||
parser.add_argument("--min_hits", type=int, default=3, help="min hits to create track in SORT")
|
||||
parser.add_argument(
|
||||
"--inertia",
|
||||
type=float,
|
||||
default=0.2,
|
||||
help="the weight of VDC term in cost matrix",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--deltat",
|
||||
type=int,
|
||||
default=3,
|
||||
help="time step difference to estimate direction",
|
||||
)
|
||||
parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks")
|
||||
parser.add_argument(
|
||||
"--match_thresh",
|
||||
type=float,
|
||||
default=0.9,
|
||||
help="matching threshold for tracking",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gt-type",
|
||||
type=str,
|
||||
default="_val_half",
|
||||
help="suffix to find the gt annotation",
|
||||
)
|
||||
parser.add_argument("--public", action="store_true", help="use public detection")
|
||||
parser.add_argument("--asso", default="iou", help="similarity function: iou/giou/diou/ciou/ctdis")
|
||||
|
||||
# for kitti/bdd100k inference with public detections
|
||||
parser.add_argument(
|
||||
"--raw_results_path",
|
||||
type=str,
|
||||
default="exps/permatrack_kitti_test/",
|
||||
help="path to the raw tracking results from other tracks",
|
||||
)
|
||||
parser.add_argument("--out_path", type=str, help="path to save output results")
|
||||
parser.add_argument(
|
||||
"--hp",
|
||||
action="store_true",
|
||||
help="use head padding to add the missing objects during \
|
||||
initializing the tracks (offline).",
|
||||
)
|
||||
|
||||
# for demo video
|
||||
parser.add_argument("--demo_type", default="image", help="demo type, eg. image, video and webcam")
|
||||
parser.add_argument("--path", default="./videos/demo.mp4", help="path to images or video")
|
||||
parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
|
||||
parser.add_argument(
|
||||
"--save_result",
|
||||
action="store_true",
|
||||
help="whether to save the inference result of image/video",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
default="gpu",
|
||||
type=str,
|
||||
help="device to run our model, can either be cpu or gpu",
|
||||
)
|
||||
return parser
|
445
feeder/trackers/deepocsort/association.py
Normal file
445
feeder/trackers/deepocsort/association.py
Normal file
|
@ -0,0 +1,445 @@
|
|||
import os
|
||||
import pdb
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import softmax
|
||||
|
||||
|
||||
def iou_batch(bboxes1, bboxes2):
|
||||
"""
|
||||
From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
|
||||
"""
|
||||
bboxes2 = np.expand_dims(bboxes2, 0)
|
||||
bboxes1 = np.expand_dims(bboxes1, 1)
|
||||
|
||||
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
wh = w * h
|
||||
o = wh / (
|
||||
(bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
+ (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
- wh
|
||||
)
|
||||
return o
|
||||
|
||||
|
||||
def giou_batch(bboxes1, bboxes2):
|
||||
"""
|
||||
:param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
|
||||
:param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
|
||||
:return:
|
||||
"""
|
||||
# for details should go to https://arxiv.org/pdf/1902.09630.pdf
|
||||
# ensure predict's bbox form
|
||||
bboxes2 = np.expand_dims(bboxes2, 0)
|
||||
bboxes1 = np.expand_dims(bboxes1, 1)
|
||||
|
||||
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
wh = w * h
|
||||
iou = wh / (
|
||||
(bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
+ (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
- wh
|
||||
)
|
||||
|
||||
xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
wc = xxc2 - xxc1
|
||||
hc = yyc2 - yyc1
|
||||
assert (wc > 0).all() and (hc > 0).all()
|
||||
area_enclose = wc * hc
|
||||
giou = iou - (area_enclose - wh) / area_enclose
|
||||
giou = (giou + 1.0) / 2.0 # resize from (-1,1) to (0,1)
|
||||
return giou
|
||||
|
||||
|
||||
def diou_batch(bboxes1, bboxes2):
|
||||
"""
|
||||
:param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
|
||||
:param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
|
||||
:return:
|
||||
"""
|
||||
# for details should go to https://arxiv.org/pdf/1902.09630.pdf
|
||||
# ensure predict's bbox form
|
||||
bboxes2 = np.expand_dims(bboxes2, 0)
|
||||
bboxes1 = np.expand_dims(bboxes1, 1)
|
||||
|
||||
# calculate the intersection box
|
||||
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
wh = w * h
|
||||
iou = wh / (
|
||||
(bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
+ (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
- wh
|
||||
)
|
||||
|
||||
centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
|
||||
centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
|
||||
centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
|
||||
centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
|
||||
|
||||
inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
|
||||
|
||||
xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
|
||||
outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
|
||||
diou = iou - inner_diag / outer_diag
|
||||
|
||||
return (diou + 1) / 2.0 # resize from (-1,1) to (0,1)
|
||||
|
||||
|
||||
def ciou_batch(bboxes1, bboxes2):
|
||||
"""
|
||||
:param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
|
||||
:param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
|
||||
:return:
|
||||
"""
|
||||
# for details should go to https://arxiv.org/pdf/1902.09630.pdf
|
||||
# ensure predict's bbox form
|
||||
bboxes2 = np.expand_dims(bboxes2, 0)
|
||||
bboxes1 = np.expand_dims(bboxes1, 1)
|
||||
|
||||
# calculate the intersection box
|
||||
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
wh = w * h
|
||||
iou = wh / (
|
||||
(bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
|
||||
+ (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
|
||||
- wh
|
||||
)
|
||||
|
||||
centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
|
||||
centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
|
||||
centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
|
||||
centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
|
||||
|
||||
inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
|
||||
|
||||
xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
|
||||
yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
|
||||
xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
|
||||
yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
|
||||
|
||||
outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
|
||||
|
||||
w1 = bboxes1[..., 2] - bboxes1[..., 0]
|
||||
h1 = bboxes1[..., 3] - bboxes1[..., 1]
|
||||
w2 = bboxes2[..., 2] - bboxes2[..., 0]
|
||||
h2 = bboxes2[..., 3] - bboxes2[..., 1]
|
||||
|
||||
# prevent dividing over zero. add one pixel shift
|
||||
h2 = h2 + 1.0
|
||||
h1 = h1 + 1.0
|
||||
arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1)
|
||||
v = (4 / (np.pi**2)) * (arctan**2)
|
||||
S = 1 - iou
|
||||
alpha = v / (S + v)
|
||||
ciou = iou - inner_diag / outer_diag - alpha * v
|
||||
|
||||
return (ciou + 1) / 2.0 # resize from (-1,1) to (0,1)
|
||||
|
||||
|
||||
def ct_dist(bboxes1, bboxes2):
|
||||
"""
|
||||
Measure the center distance between two sets of bounding boxes,
|
||||
this is a coarse implementation, we don't recommend using it only
|
||||
for association, which can be unstable and sensitive to frame rate
|
||||
and object speed.
|
||||
"""
|
||||
bboxes2 = np.expand_dims(bboxes2, 0)
|
||||
bboxes1 = np.expand_dims(bboxes1, 1)
|
||||
|
||||
centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
|
||||
centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
|
||||
centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
|
||||
centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
|
||||
|
||||
ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
|
||||
|
||||
ct_dist = np.sqrt(ct_dist2)
|
||||
|
||||
# The linear rescaling is a naive version and needs more study
|
||||
ct_dist = ct_dist / ct_dist.max()
|
||||
return ct_dist.max() - ct_dist # resize to (0,1)
|
||||
|
||||
|
||||
def speed_direction_batch(dets, tracks):
|
||||
tracks = tracks[..., np.newaxis]
|
||||
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
|
||||
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
|
||||
dx = CX1 - CX2
|
||||
dy = CY1 - CY2
|
||||
norm = np.sqrt(dx**2 + dy**2) + 1e-6
|
||||
dx = dx / norm
|
||||
dy = dy / norm
|
||||
return dy, dx # size: num_track x num_det
|
||||
|
||||
|
||||
def linear_assignment(cost_matrix):
|
||||
try:
|
||||
import lap
|
||||
|
||||
_, x, y = lap.lapjv(cost_matrix, extend_cost=True)
|
||||
return np.array([[y[i], i] for i in x if i >= 0]) #
|
||||
except ImportError:
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
|
||||
x, y = linear_sum_assignment(cost_matrix)
|
||||
return np.array(list(zip(x, y)))
|
||||
|
||||
|
||||
def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
|
||||
"""
|
||||
Assigns detections to tracked object (both represented as bounding boxes)
|
||||
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
|
||||
"""
|
||||
if len(trackers) == 0:
|
||||
return (
|
||||
np.empty((0, 2), dtype=int),
|
||||
np.arange(len(detections)),
|
||||
np.empty((0, 5), dtype=int),
|
||||
)
|
||||
|
||||
iou_matrix = iou_batch(detections, trackers)
|
||||
|
||||
if min(iou_matrix.shape) > 0:
|
||||
a = (iou_matrix > iou_threshold).astype(np.int32)
|
||||
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
|
||||
matched_indices = np.stack(np.where(a), axis=1)
|
||||
else:
|
||||
matched_indices = linear_assignment(-iou_matrix)
|
||||
else:
|
||||
matched_indices = np.empty(shape=(0, 2))
|
||||
|
||||
unmatched_detections = []
|
||||
for d, det in enumerate(detections):
|
||||
if d not in matched_indices[:, 0]:
|
||||
unmatched_detections.append(d)
|
||||
unmatched_trackers = []
|
||||
for t, trk in enumerate(trackers):
|
||||
if t not in matched_indices[:, 1]:
|
||||
unmatched_trackers.append(t)
|
||||
|
||||
# filter out matched with low IOU
|
||||
matches = []
|
||||
for m in matched_indices:
|
||||
if iou_matrix[m[0], m[1]] < iou_threshold:
|
||||
unmatched_detections.append(m[0])
|
||||
unmatched_trackers.append(m[1])
|
||||
else:
|
||||
matches.append(m.reshape(1, 2))
|
||||
if len(matches) == 0:
|
||||
matches = np.empty((0, 2), dtype=int)
|
||||
else:
|
||||
matches = np.concatenate(matches, axis=0)
|
||||
|
||||
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
||||
|
||||
|
||||
def compute_aw_max_metric(emb_cost, w_association_emb, bottom=0.5):
|
||||
w_emb = np.full_like(emb_cost, w_association_emb)
|
||||
|
||||
for idx in range(emb_cost.shape[0]):
|
||||
inds = np.argsort(-emb_cost[idx])
|
||||
# If there's less than two matches, just keep original weight
|
||||
if len(inds) < 2:
|
||||
continue
|
||||
if emb_cost[idx, inds[0]] == 0:
|
||||
row_weight = 0
|
||||
else:
|
||||
row_weight = 1 - max((emb_cost[idx, inds[1]] / emb_cost[idx, inds[0]]) - bottom, 0) / (1 - bottom)
|
||||
w_emb[idx] *= row_weight
|
||||
|
||||
for idj in range(emb_cost.shape[1]):
|
||||
inds = np.argsort(-emb_cost[:, idj])
|
||||
# If there's less than two matches, just keep original weight
|
||||
if len(inds) < 2:
|
||||
continue
|
||||
if emb_cost[inds[0], idj] == 0:
|
||||
col_weight = 0
|
||||
else:
|
||||
col_weight = 1 - max((emb_cost[inds[1], idj] / emb_cost[inds[0], idj]) - bottom, 0) / (1 - bottom)
|
||||
w_emb[:, idj] *= col_weight
|
||||
|
||||
return w_emb * emb_cost
|
||||
|
||||
|
||||
def associate(
|
||||
detections, trackers, iou_threshold, velocities, previous_obs, vdc_weight, emb_cost, w_assoc_emb, aw_off, aw_param
|
||||
):
|
||||
if len(trackers) == 0:
|
||||
return (
|
||||
np.empty((0, 2), dtype=int),
|
||||
np.arange(len(detections)),
|
||||
np.empty((0, 5), dtype=int),
|
||||
)
|
||||
|
||||
Y, X = speed_direction_batch(detections, previous_obs)
|
||||
inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
|
||||
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
|
||||
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
|
||||
diff_angle_cos = inertia_X * X + inertia_Y * Y
|
||||
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
|
||||
diff_angle = np.arccos(diff_angle_cos)
|
||||
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
|
||||
|
||||
valid_mask = np.ones(previous_obs.shape[0])
|
||||
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
|
||||
|
||||
iou_matrix = iou_batch(detections, trackers)
|
||||
scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
|
||||
# iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
|
||||
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
|
||||
|
||||
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
|
||||
angle_diff_cost = angle_diff_cost.T
|
||||
angle_diff_cost = angle_diff_cost * scores
|
||||
|
||||
if min(iou_matrix.shape) > 0:
|
||||
a = (iou_matrix > iou_threshold).astype(np.int32)
|
||||
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
|
||||
matched_indices = np.stack(np.where(a), axis=1)
|
||||
else:
|
||||
if emb_cost is None:
|
||||
emb_cost = 0
|
||||
else:
|
||||
emb_cost = emb_cost.cpu().numpy()
|
||||
emb_cost[iou_matrix <= 0] = 0
|
||||
if not aw_off:
|
||||
emb_cost = compute_aw_max_metric(emb_cost, w_assoc_emb, bottom=aw_param)
|
||||
else:
|
||||
emb_cost *= w_assoc_emb
|
||||
|
||||
final_cost = -(iou_matrix + angle_diff_cost + emb_cost)
|
||||
matched_indices = linear_assignment(final_cost)
|
||||
else:
|
||||
matched_indices = np.empty(shape=(0, 2))
|
||||
|
||||
unmatched_detections = []
|
||||
for d, det in enumerate(detections):
|
||||
if d not in matched_indices[:, 0]:
|
||||
unmatched_detections.append(d)
|
||||
unmatched_trackers = []
|
||||
for t, trk in enumerate(trackers):
|
||||
if t not in matched_indices[:, 1]:
|
||||
unmatched_trackers.append(t)
|
||||
|
||||
# filter out matched with low IOU
|
||||
matches = []
|
||||
for m in matched_indices:
|
||||
if iou_matrix[m[0], m[1]] < iou_threshold:
|
||||
unmatched_detections.append(m[0])
|
||||
unmatched_trackers.append(m[1])
|
||||
else:
|
||||
matches.append(m.reshape(1, 2))
|
||||
if len(matches) == 0:
|
||||
matches = np.empty((0, 2), dtype=int)
|
||||
else:
|
||||
matches = np.concatenate(matches, axis=0)
|
||||
|
||||
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
||||
|
||||
|
||||
def associate_kitti(detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight):
|
||||
if len(trackers) == 0:
|
||||
return (
|
||||
np.empty((0, 2), dtype=int),
|
||||
np.arange(len(detections)),
|
||||
np.empty((0, 5), dtype=int),
|
||||
)
|
||||
|
||||
"""
|
||||
Cost from the velocity direction consistency
|
||||
"""
|
||||
Y, X = speed_direction_batch(detections, previous_obs)
|
||||
inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
|
||||
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
|
||||
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
|
||||
diff_angle_cos = inertia_X * X + inertia_Y * Y
|
||||
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
|
||||
diff_angle = np.arccos(diff_angle_cos)
|
||||
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
|
||||
|
||||
valid_mask = np.ones(previous_obs.shape[0])
|
||||
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
|
||||
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
|
||||
|
||||
scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
|
||||
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
|
||||
angle_diff_cost = angle_diff_cost.T
|
||||
angle_diff_cost = angle_diff_cost * scores
|
||||
|
||||
"""
|
||||
Cost from IoU
|
||||
"""
|
||||
iou_matrix = iou_batch(detections, trackers)
|
||||
|
||||
"""
|
||||
With multiple categories, generate the cost for catgory mismatch
|
||||
"""
|
||||
num_dets = detections.shape[0]
|
||||
num_trk = trackers.shape[0]
|
||||
cate_matrix = np.zeros((num_dets, num_trk))
|
||||
for i in range(num_dets):
|
||||
for j in range(num_trk):
|
||||
if det_cates[i] != trackers[j, 4]:
|
||||
cate_matrix[i][j] = -1e6
|
||||
|
||||
cost_matrix = -iou_matrix - angle_diff_cost - cate_matrix
|
||||
|
||||
if min(iou_matrix.shape) > 0:
|
||||
a = (iou_matrix > iou_threshold).astype(np.int32)
|
||||
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
|
||||
matched_indices = np.stack(np.where(a), axis=1)
|
||||
else:
|
||||
matched_indices = linear_assignment(cost_matrix)
|
||||
else:
|
||||
matched_indices = np.empty(shape=(0, 2))
|
||||
|
||||
unmatched_detections = []
|
||||
for d, det in enumerate(detections):
|
||||
if d not in matched_indices[:, 0]:
|
||||
unmatched_detections.append(d)
|
||||
unmatched_trackers = []
|
||||
for t, trk in enumerate(trackers):
|
||||
if t not in matched_indices[:, 1]:
|
||||
unmatched_trackers.append(t)
|
||||
|
||||
# filter out matched with low IOU
|
||||
matches = []
|
||||
for m in matched_indices:
|
||||
if iou_matrix[m[0], m[1]] < iou_threshold:
|
||||
unmatched_detections.append(m[0])
|
||||
unmatched_trackers.append(m[1])
|
||||
else:
|
||||
matches.append(m.reshape(1, 2))
|
||||
if len(matches) == 0:
|
||||
matches = np.empty((0, 2), dtype=int)
|
||||
else:
|
||||
matches = np.concatenate(matches, axis=0)
|
||||
|
||||
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
170
feeder/trackers/deepocsort/cmc.py
Normal file
170
feeder/trackers/deepocsort/cmc.py
Normal file
|
@ -0,0 +1,170 @@
|
|||
import pdb
|
||||
import pickle
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CMCComputer:
|
||||
def __init__(self, minimum_features=10, method="sparse"):
|
||||
assert method in ["file", "sparse", "sift"]
|
||||
|
||||
os.makedirs("./cache", exist_ok=True)
|
||||
self.cache_path = "./cache/affine_ocsort.pkl"
|
||||
self.cache = {}
|
||||
if os.path.exists(self.cache_path):
|
||||
with open(self.cache_path, "rb") as fp:
|
||||
self.cache = pickle.load(fp)
|
||||
self.minimum_features = minimum_features
|
||||
self.prev_img = None
|
||||
self.prev_desc = None
|
||||
self.sparse_flow_param = dict(
|
||||
maxCorners=3000,
|
||||
qualityLevel=0.01,
|
||||
minDistance=1,
|
||||
blockSize=3,
|
||||
useHarrisDetector=False,
|
||||
k=0.04,
|
||||
)
|
||||
self.file_computed = {}
|
||||
|
||||
self.comp_function = None
|
||||
if method == "sparse":
|
||||
self.comp_function = self._affine_sparse_flow
|
||||
elif method == "sift":
|
||||
self.comp_function = self._affine_sift
|
||||
# Same BoT-SORT CMC arrays
|
||||
elif method == "file":
|
||||
self.comp_function = self._affine_file
|
||||
self.file_affines = {}
|
||||
# Maps from tag name to file name
|
||||
self.file_names = {}
|
||||
|
||||
# All the ablation file names
|
||||
for f_name in os.listdir("./cache/cmc_files/MOT17_ablation/"):
|
||||
# The tag that'll be passed into compute_affine based on image name
|
||||
tag = f_name.replace("GMC-", "").replace(".txt", "") + "-FRCNN"
|
||||
f_name = os.path.join("./cache/cmc_files/MOT17_ablation/", f_name)
|
||||
self.file_names[tag] = f_name
|
||||
for f_name in os.listdir("./cache/cmc_files/MOT20_ablation/"):
|
||||
tag = f_name.replace("GMC-", "").replace(".txt", "")
|
||||
f_name = os.path.join("./cache/cmc_files/MOT20_ablation/", f_name)
|
||||
self.file_names[tag] = f_name
|
||||
|
||||
# All the test file names
|
||||
for f_name in os.listdir("./cache/cmc_files/MOTChallenge/"):
|
||||
tag = f_name.replace("GMC-", "").replace(".txt", "")
|
||||
if "MOT17" in tag:
|
||||
tag = tag + "-FRCNN"
|
||||
# If it's an ablation one (not test) don't overwrite it
|
||||
if tag in self.file_names:
|
||||
continue
|
||||
f_name = os.path.join("./cache/cmc_files/MOTChallenge/", f_name)
|
||||
self.file_names[tag] = f_name
|
||||
|
||||
def compute_affine(self, img, bbox, tag):
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
if tag in self.cache:
|
||||
A = self.cache[tag]
|
||||
return A
|
||||
mask = np.ones_like(img, dtype=np.uint8)
|
||||
if bbox.shape[0] > 0:
|
||||
bbox = np.round(bbox).astype(np.int32)
|
||||
bbox[bbox < 0] = 0
|
||||
for bb in bbox:
|
||||
mask[bb[1] : bb[3], bb[0] : bb[2]] = 0
|
||||
|
||||
A = self.comp_function(img, mask, tag)
|
||||
self.cache[tag] = A
|
||||
|
||||
return A
|
||||
|
||||
def _load_file(self, name):
|
||||
affines = []
|
||||
with open(self.file_names[name], "r") as fp:
|
||||
for line in fp:
|
||||
tokens = [float(f) for f in line.split("\t")[1:7]]
|
||||
A = np.eye(2, 3)
|
||||
A[0, 0] = tokens[0]
|
||||
A[0, 1] = tokens[1]
|
||||
A[0, 2] = tokens[2]
|
||||
A[1, 0] = tokens[3]
|
||||
A[1, 1] = tokens[4]
|
||||
A[1, 2] = tokens[5]
|
||||
affines.append(A)
|
||||
self.file_affines[name] = affines
|
||||
|
||||
def _affine_file(self, frame, mask, tag):
|
||||
name, num = tag.split(":")
|
||||
if name not in self.file_affines:
|
||||
self._load_file(name)
|
||||
if name not in self.file_affines:
|
||||
raise RuntimeError("Error loading file affines for CMC.")
|
||||
|
||||
return self.file_affines[name][int(num) - 1]
|
||||
|
||||
def _affine_sift(self, frame, mask, tag):
|
||||
A = np.eye(2, 3)
|
||||
detector = cv2.SIFT_create()
|
||||
kp, desc = detector.detectAndCompute(frame, mask)
|
||||
if self.prev_desc is None:
|
||||
self.prev_desc = [kp, desc]
|
||||
return A
|
||||
if desc.shape[0] < self.minimum_features or self.prev_desc[1].shape[0] < self.minimum_features:
|
||||
return A
|
||||
|
||||
bf = cv2.BFMatcher(cv2.NORM_L2)
|
||||
matches = bf.knnMatch(self.prev_desc[1], desc, k=2)
|
||||
good = []
|
||||
for m, n in matches:
|
||||
if m.distance < 0.7 * n.distance:
|
||||
good.append(m)
|
||||
|
||||
if len(good) > self.minimum_features:
|
||||
src_pts = np.float32([self.prev_desc[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
|
||||
dst_pts = np.float32([kp[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
|
||||
A, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC)
|
||||
else:
|
||||
print("Warning: not enough matching points")
|
||||
if A is None:
|
||||
A = np.eye(2, 3)
|
||||
|
||||
self.prev_desc = [kp, desc]
|
||||
return A
|
||||
|
||||
def _affine_sparse_flow(self, frame, mask, tag):
|
||||
# Initialize
|
||||
A = np.eye(2, 3)
|
||||
|
||||
# find the keypoints
|
||||
keypoints = cv2.goodFeaturesToTrack(frame, mask=mask, **self.sparse_flow_param)
|
||||
|
||||
# Handle first frame
|
||||
if self.prev_img is None:
|
||||
self.prev_img = frame
|
||||
self.prev_desc = keypoints
|
||||
return A
|
||||
|
||||
matched_kp, status, err = cv2.calcOpticalFlowPyrLK(self.prev_img, frame, self.prev_desc, None)
|
||||
matched_kp = matched_kp.reshape(-1, 2)
|
||||
status = status.reshape(-1)
|
||||
prev_points = self.prev_desc.reshape(-1, 2)
|
||||
prev_points = prev_points[status]
|
||||
curr_points = matched_kp[status]
|
||||
|
||||
# Find rigid matrix
|
||||
if prev_points.shape[0] > self.minimum_features:
|
||||
A, _ = cv2.estimateAffinePartial2D(prev_points, curr_points, method=cv2.RANSAC)
|
||||
else:
|
||||
print("Warning: not enough matching points")
|
||||
if A is None:
|
||||
A = np.eye(2, 3)
|
||||
|
||||
self.prev_img = frame
|
||||
self.prev_desc = keypoints
|
||||
return A
|
||||
|
||||
def dump_cache(self):
|
||||
with open(self.cache_path, "wb") as fp:
|
||||
pickle.dump(self.cache, fp)
|
12
feeder/trackers/deepocsort/configs/deepocsort.yaml
Normal file
12
feeder/trackers/deepocsort/configs/deepocsort.yaml
Normal file
|
@ -0,0 +1,12 @@
|
|||
# Trial number: 137
|
||||
# HOTA, MOTA, IDF1: [55.567]
|
||||
deepocsort:
|
||||
asso_func: giou
|
||||
conf_thres: 0.5122620708221085
|
||||
delta_t: 1
|
||||
det_thresh: 0
|
||||
inertia: 0.3941737016672115
|
||||
iou_thresh: 0.22136877277096445
|
||||
max_age: 50
|
||||
min_hits: 1
|
||||
use_byte: false
|
116
feeder/trackers/deepocsort/embedding.py
Normal file
116
feeder/trackers/deepocsort/embedding.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
import pdb
|
||||
from collections import OrderedDict
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import torch
|
||||
import cv2
|
||||
import torchvision
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
class EmbeddingComputer:
|
||||
def __init__(self, dataset):
|
||||
self.model = None
|
||||
self.dataset = dataset
|
||||
self.crop_size = (128, 384)
|
||||
os.makedirs("./cache/embeddings/", exist_ok=True)
|
||||
self.cache_path = "./cache/embeddings/{}_embedding.pkl"
|
||||
self.cache = {}
|
||||
self.cache_name = ""
|
||||
|
||||
def load_cache(self, path):
|
||||
self.cache_name = path
|
||||
cache_path = self.cache_path.format(path)
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, "rb") as fp:
|
||||
self.cache = pickle.load(fp)
|
||||
|
||||
def compute_embedding(self, img, bbox, tag, is_numpy=True):
|
||||
if self.cache_name != tag.split(":")[0]:
|
||||
self.load_cache(tag.split(":")[0])
|
||||
|
||||
if tag in self.cache:
|
||||
embs = self.cache[tag]
|
||||
if embs.shape[0] != bbox.shape[0]:
|
||||
raise RuntimeError(
|
||||
"ERROR: The number of cached embeddings don't match the "
|
||||
"number of detections.\nWas the detector model changed? Delete cache if so."
|
||||
)
|
||||
return embs
|
||||
|
||||
if self.model is None:
|
||||
self.initialize_model()
|
||||
|
||||
# Make sure bbox is within image frame
|
||||
if is_numpy:
|
||||
h, w = img.shape[:2]
|
||||
else:
|
||||
h, w = img.shape[2:]
|
||||
results = np.round(bbox).astype(np.int32)
|
||||
results[:, 0] = results[:, 0].clip(0, w)
|
||||
results[:, 1] = results[:, 1].clip(0, h)
|
||||
results[:, 2] = results[:, 2].clip(0, w)
|
||||
results[:, 3] = results[:, 3].clip(0, h)
|
||||
|
||||
# Generate all the crops
|
||||
crops = []
|
||||
for p in results:
|
||||
if is_numpy:
|
||||
crop = img[p[1] : p[3], p[0] : p[2]]
|
||||
crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
|
||||
crop = cv2.resize(crop, self.crop_size, interpolation=cv2.INTER_LINEAR)
|
||||
crop = torch.as_tensor(crop.astype("float32").transpose(2, 0, 1))
|
||||
crop = crop.unsqueeze(0)
|
||||
else:
|
||||
crop = img[:, :, p[1] : p[3], p[0] : p[2]]
|
||||
crop = torchvision.transforms.functional.resize(crop, self.crop_size)
|
||||
|
||||
crops.append(crop)
|
||||
|
||||
crops = torch.cat(crops, dim=0)
|
||||
|
||||
# Create embeddings and l2 normalize them
|
||||
with torch.no_grad():
|
||||
crops = crops.cuda()
|
||||
crops = crops.half()
|
||||
embs = self.model(crops)
|
||||
embs = torch.nn.functional.normalize(embs)
|
||||
embs = embs.cpu().numpy()
|
||||
|
||||
self.cache[tag] = embs
|
||||
return embs
|
||||
|
||||
def initialize_model(self):
|
||||
"""
|
||||
model = torchreid.models.build_model(name="osnet_ain_x1_0", num_classes=2510, loss="softmax", pretrained=False)
|
||||
sd = torch.load("external/weights/osnet_ain_ms_d_c.pth.tar")["state_dict"]
|
||||
new_state_dict = OrderedDict()
|
||||
for k, v in sd.items():
|
||||
name = k[7:] # remove `module.`
|
||||
new_state_dict[name] = v
|
||||
# load params
|
||||
model.load_state_dict(new_state_dict)
|
||||
model.eval()
|
||||
model.cuda()
|
||||
"""
|
||||
if self.dataset == "mot17":
|
||||
path = "external/weights/mot17_sbs_S50.pth"
|
||||
elif self.dataset == "mot20":
|
||||
path = "external/weights/mot20_sbs_S50.pth"
|
||||
elif self.dataset == "dance":
|
||||
path = None
|
||||
else:
|
||||
raise RuntimeError("Need the path for a new ReID model.")
|
||||
|
||||
model = FastReID(path)
|
||||
model.eval()
|
||||
model.cuda()
|
||||
model.half()
|
||||
self.model = model
|
||||
|
||||
def dump_cache(self):
|
||||
if self.cache_name:
|
||||
with open(self.cache_path.format(self.cache_name), "wb") as fp:
|
||||
pickle.dump(self.cache, fp)
|
1636
feeder/trackers/deepocsort/kalmanfilter.py
Normal file
1636
feeder/trackers/deepocsort/kalmanfilter.py
Normal file
File diff suppressed because it is too large
Load diff
670
feeder/trackers/deepocsort/ocsort.py
Normal file
670
feeder/trackers/deepocsort/ocsort.py
Normal file
|
@ -0,0 +1,670 @@
|
|||
"""
|
||||
This script is adopted from the SORT script by Alex Bewley alex@bewley.ai
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
import pdb
|
||||
import pickle
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import torchvision
|
||||
|
||||
import numpy as np
|
||||
from .association import *
|
||||
from .embedding import EmbeddingComputer
|
||||
from .cmc import CMCComputer
|
||||
from reid_multibackend import ReIDDetectMultiBackend
|
||||
|
||||
|
||||
def k_previous_obs(observations, cur_age, k):
|
||||
if len(observations) == 0:
|
||||
return [-1, -1, -1, -1, -1]
|
||||
for i in range(k):
|
||||
dt = k - i
|
||||
if cur_age - dt in observations:
|
||||
return observations[cur_age - dt]
|
||||
max_age = max(observations.keys())
|
||||
return observations[max_age]
|
||||
|
||||
|
||||
def convert_bbox_to_z(bbox):
|
||||
"""
|
||||
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
|
||||
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
|
||||
the aspect ratio
|
||||
"""
|
||||
w = bbox[2] - bbox[0]
|
||||
h = bbox[3] - bbox[1]
|
||||
x = bbox[0] + w / 2.0
|
||||
y = bbox[1] + h / 2.0
|
||||
s = w * h # scale is just area
|
||||
r = w / float(h + 1e-6)
|
||||
return np.array([x, y, s, r]).reshape((4, 1))
|
||||
|
||||
|
||||
def convert_bbox_to_z_new(bbox):
|
||||
w = bbox[2] - bbox[0]
|
||||
h = bbox[3] - bbox[1]
|
||||
x = bbox[0] + w / 2.0
|
||||
y = bbox[1] + h / 2.0
|
||||
return np.array([x, y, w, h]).reshape((4, 1))
|
||||
|
||||
|
||||
def convert_x_to_bbox_new(x):
|
||||
x, y, w, h = x.reshape(-1)[:4]
|
||||
return np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2]).reshape(1, 4)
|
||||
|
||||
|
||||
def convert_x_to_bbox(x, score=None):
|
||||
"""
|
||||
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
|
||||
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
|
||||
"""
|
||||
w = np.sqrt(x[2] * x[3])
|
||||
h = x[2] / w
|
||||
if score == None:
|
||||
return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0]).reshape((1, 4))
|
||||
else:
|
||||
return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score]).reshape((1, 5))
|
||||
|
||||
|
||||
def speed_direction(bbox1, bbox2):
|
||||
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
|
||||
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
|
||||
speed = np.array([cy2 - cy1, cx2 - cx1])
|
||||
norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6
|
||||
return speed / norm
|
||||
|
||||
|
||||
def new_kf_process_noise(w, h, p=1 / 20, v=1 / 160):
|
||||
Q = np.diag(
|
||||
((p * w) ** 2, (p * h) ** 2, (p * w) ** 2, (p * h) ** 2, (v * w) ** 2, (v * h) ** 2, (v * w) ** 2, (v * h) ** 2)
|
||||
)
|
||||
return Q
|
||||
|
||||
|
||||
def new_kf_measurement_noise(w, h, m=1 / 20):
|
||||
w_var = (m * w) ** 2
|
||||
h_var = (m * h) ** 2
|
||||
R = np.diag((w_var, h_var, w_var, h_var))
|
||||
return R
|
||||
|
||||
|
||||
class KalmanBoxTracker(object):
|
||||
"""
|
||||
This class represents the internal state of individual tracked objects observed as bbox.
|
||||
"""
|
||||
|
||||
count = 0
|
||||
|
||||
def __init__(self, bbox, cls, delta_t=3, orig=False, emb=None, alpha=0, new_kf=False):
|
||||
"""
|
||||
Initialises a tracker using initial bounding box.
|
||||
|
||||
"""
|
||||
# define constant velocity model
|
||||
if not orig:
|
||||
from .kalmanfilter import KalmanFilterNew as KalmanFilter
|
||||
else:
|
||||
from filterpy.kalman import KalmanFilter
|
||||
self.cls = cls
|
||||
self.conf = bbox[-1]
|
||||
self.new_kf = new_kf
|
||||
if new_kf:
|
||||
self.kf = KalmanFilter(dim_x=8, dim_z=4)
|
||||
self.kf.F = np.array(
|
||||
[
|
||||
# x y w h x' y' w' h'
|
||||
[1, 0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 1],
|
||||
]
|
||||
)
|
||||
self.kf.H = np.array(
|
||||
[
|
||||
[1, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 1, 0, 0, 0, 0],
|
||||
]
|
||||
)
|
||||
_, _, w, h = convert_bbox_to_z_new(bbox).reshape(-1)
|
||||
self.kf.P = new_kf_process_noise(w, h)
|
||||
self.kf.P[:4, :4] *= 4
|
||||
self.kf.P[4:, 4:] *= 100
|
||||
# Process and measurement uncertainty happen in functions
|
||||
self.bbox_to_z_func = convert_bbox_to_z_new
|
||||
self.x_to_bbox_func = convert_x_to_bbox_new
|
||||
else:
|
||||
self.kf = KalmanFilter(dim_x=7, dim_z=4)
|
||||
self.kf.F = np.array(
|
||||
[
|
||||
# x y s r x' y' s'
|
||||
[1, 0, 0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0, 1, 0],
|
||||
[0, 0, 1, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1, 0, 0],
|
||||
[0, 0, 0, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0, 1],
|
||||
]
|
||||
)
|
||||
self.kf.H = np.array(
|
||||
[
|
||||
[1, 0, 0, 0, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0, 0],
|
||||
[0, 0, 0, 1, 0, 0, 0],
|
||||
]
|
||||
)
|
||||
self.kf.R[2:, 2:] *= 10.0
|
||||
self.kf.P[4:, 4:] *= 1000.0 # give high uncertainty to the unobservable initial velocities
|
||||
self.kf.P *= 10.0
|
||||
self.kf.Q[-1, -1] *= 0.01
|
||||
self.kf.Q[4:, 4:] *= 0.01
|
||||
self.bbox_to_z_func = convert_bbox_to_z
|
||||
self.x_to_bbox_func = convert_x_to_bbox
|
||||
|
||||
self.kf.x[:4] = self.bbox_to_z_func(bbox)
|
||||
|
||||
self.time_since_update = 0
|
||||
self.id = KalmanBoxTracker.count
|
||||
KalmanBoxTracker.count += 1
|
||||
self.history = []
|
||||
self.hits = 0
|
||||
self.hit_streak = 0
|
||||
self.age = 0
|
||||
"""
|
||||
NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of
|
||||
function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a
|
||||
fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.
|
||||
"""
|
||||
# Used for OCR
|
||||
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
|
||||
# Used to output track after min_hits reached
|
||||
self.history_observations = []
|
||||
# Used for velocity
|
||||
self.observations = dict()
|
||||
self.velocity = None
|
||||
self.delta_t = delta_t
|
||||
|
||||
self.emb = emb
|
||||
|
||||
self.frozen = False
|
||||
|
||||
def update(self, bbox, cls):
|
||||
"""
|
||||
Updates the state vector with observed bbox.
|
||||
"""
|
||||
if bbox is not None:
|
||||
self.frozen = False
|
||||
self.cls = cls
|
||||
if self.last_observation.sum() >= 0: # no previous observation
|
||||
previous_box = None
|
||||
for dt in range(self.delta_t, 0, -1):
|
||||
if self.age - dt in self.observations:
|
||||
previous_box = self.observations[self.age - dt]
|
||||
break
|
||||
if previous_box is None:
|
||||
previous_box = self.last_observation
|
||||
"""
|
||||
Estimate the track speed direction with observations \Delta t steps away
|
||||
"""
|
||||
self.velocity = speed_direction(previous_box, bbox)
|
||||
"""
|
||||
Insert new observations. This is a ugly way to maintain both self.observations
|
||||
and self.history_observations. Bear it for the moment.
|
||||
"""
|
||||
self.last_observation = bbox
|
||||
self.observations[self.age] = bbox
|
||||
self.history_observations.append(bbox)
|
||||
|
||||
self.time_since_update = 0
|
||||
self.history = []
|
||||
self.hits += 1
|
||||
self.hit_streak += 1
|
||||
if self.new_kf:
|
||||
R = new_kf_measurement_noise(self.kf.x[2, 0], self.kf.x[3, 0])
|
||||
self.kf.update(self.bbox_to_z_func(bbox), R=R)
|
||||
else:
|
||||
self.kf.update(self.bbox_to_z_func(bbox))
|
||||
else:
|
||||
self.kf.update(bbox)
|
||||
self.frozen = True
|
||||
|
||||
def update_emb(self, emb, alpha=0.9):
|
||||
self.emb = alpha * self.emb + (1 - alpha) * emb
|
||||
self.emb /= np.linalg.norm(self.emb)
|
||||
|
||||
def get_emb(self):
|
||||
return self.emb.cpu()
|
||||
|
||||
def apply_affine_correction(self, affine):
|
||||
m = affine[:, :2]
|
||||
t = affine[:, 2].reshape(2, 1)
|
||||
# For OCR
|
||||
if self.last_observation.sum() > 0:
|
||||
ps = self.last_observation[:4].reshape(2, 2).T
|
||||
ps = m @ ps + t
|
||||
self.last_observation[:4] = ps.T.reshape(-1)
|
||||
|
||||
# Apply to each box in the range of velocity computation
|
||||
for dt in range(self.delta_t, -1, -1):
|
||||
if self.age - dt in self.observations:
|
||||
ps = self.observations[self.age - dt][:4].reshape(2, 2).T
|
||||
ps = m @ ps + t
|
||||
self.observations[self.age - dt][:4] = ps.T.reshape(-1)
|
||||
|
||||
# Also need to change kf state, but might be frozen
|
||||
self.kf.apply_affine_correction(m, t, self.new_kf)
|
||||
|
||||
def predict(self):
|
||||
"""
|
||||
Advances the state vector and returns the predicted bounding box estimate.
|
||||
"""
|
||||
# Don't allow negative bounding boxes
|
||||
if self.new_kf:
|
||||
if self.kf.x[2] + self.kf.x[6] <= 0:
|
||||
self.kf.x[6] = 0
|
||||
if self.kf.x[3] + self.kf.x[7] <= 0:
|
||||
self.kf.x[7] = 0
|
||||
|
||||
# Stop velocity, will update in kf during OOS
|
||||
if self.frozen:
|
||||
self.kf.x[6] = self.kf.x[7] = 0
|
||||
Q = new_kf_process_noise(self.kf.x[2, 0], self.kf.x[3, 0])
|
||||
else:
|
||||
if (self.kf.x[6] + self.kf.x[2]) <= 0:
|
||||
self.kf.x[6] *= 0.0
|
||||
Q = None
|
||||
|
||||
self.kf.predict(Q=Q)
|
||||
self.age += 1
|
||||
if self.time_since_update > 0:
|
||||
self.hit_streak = 0
|
||||
self.time_since_update += 1
|
||||
self.history.append(self.x_to_bbox_func(self.kf.x))
|
||||
return self.history[-1]
|
||||
|
||||
def get_state(self):
|
||||
"""
|
||||
Returns the current bounding box estimate.
|
||||
"""
|
||||
return self.x_to_bbox_func(self.kf.x)
|
||||
|
||||
def mahalanobis(self, bbox):
|
||||
"""Should be run after a predict() call for accuracy."""
|
||||
return self.kf.md_for_measurement(self.bbox_to_z_func(bbox))
|
||||
|
||||
|
||||
"""
|
||||
We support multiple ways for association cost calculation, by default
|
||||
we use IoU. GIoU may have better performance in some situations. We note
|
||||
that we hardly normalize the cost by all methods to (0,1) which may not be
|
||||
the best practice.
|
||||
"""
|
||||
ASSO_FUNCS = {
|
||||
"iou": iou_batch,
|
||||
"giou": giou_batch,
|
||||
"ciou": ciou_batch,
|
||||
"diou": diou_batch,
|
||||
"ct_dist": ct_dist,
|
||||
}
|
||||
|
||||
|
||||
class OCSort(object):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights,
|
||||
device,
|
||||
fp16,
|
||||
det_thresh,
|
||||
max_age=30,
|
||||
min_hits=3,
|
||||
iou_threshold=0.3,
|
||||
delta_t=3,
|
||||
asso_func="iou",
|
||||
inertia=0.2,
|
||||
w_association_emb=0.75,
|
||||
alpha_fixed_emb=0.95,
|
||||
aw_param=0.5,
|
||||
embedding_off=False,
|
||||
cmc_off=False,
|
||||
aw_off=False,
|
||||
new_kf_off=False,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
Sets key parameters for SORT
|
||||
"""
|
||||
self.max_age = max_age
|
||||
self.min_hits = min_hits
|
||||
self.iou_threshold = iou_threshold
|
||||
self.trackers = []
|
||||
self.frame_count = 0
|
||||
self.det_thresh = det_thresh
|
||||
self.delta_t = delta_t
|
||||
self.asso_func = ASSO_FUNCS[asso_func]
|
||||
self.inertia = inertia
|
||||
self.w_association_emb = w_association_emb
|
||||
self.alpha_fixed_emb = alpha_fixed_emb
|
||||
self.aw_param = aw_param
|
||||
KalmanBoxTracker.count = 0
|
||||
|
||||
self.embedder = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16)
|
||||
self.cmc = CMCComputer()
|
||||
self.embedding_off = embedding_off
|
||||
self.cmc_off = cmc_off
|
||||
self.aw_off = aw_off
|
||||
self.new_kf_off = new_kf_off
|
||||
|
||||
def update(self, dets, img_numpy, tag='blub'):
|
||||
"""
|
||||
Params:
|
||||
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
|
||||
Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
|
||||
Returns the a similar array, where the last column is the object ID.
|
||||
NOTE: The number of objects returned may differ from the number of detections provided.
|
||||
"""
|
||||
xyxys = dets[:, 0:4]
|
||||
scores = dets[:, 4]
|
||||
clss = dets[:, 5]
|
||||
|
||||
classes = clss.numpy()
|
||||
xyxys = xyxys.numpy()
|
||||
scores = scores.numpy()
|
||||
|
||||
dets = dets[:, 0:6].numpy()
|
||||
remain_inds = scores > self.det_thresh
|
||||
dets = dets[remain_inds]
|
||||
self.height, self.width = img_numpy.shape[:2]
|
||||
|
||||
# Rescale
|
||||
#scale = min(img_tensor.shape[2] / img_numpy.shape[0], img_tensor.shape[3] / img_numpy.shape[1])
|
||||
#dets[:, :4] /= scale
|
||||
|
||||
# Embedding
|
||||
if self.embedding_off or dets.shape[0] == 0:
|
||||
dets_embs = np.ones((dets.shape[0], 1))
|
||||
else:
|
||||
# (Ndets x X) [512, 1024, 2048]
|
||||
#dets_embs = self.embedder.compute_embedding(img_numpy, dets[:, :4], tag)
|
||||
dets_embs = self._get_features(dets[:, :4], img_numpy)
|
||||
|
||||
# CMC
|
||||
if not self.cmc_off:
|
||||
transform = self.cmc.compute_affine(img_numpy, dets[:, :4], tag)
|
||||
for trk in self.trackers:
|
||||
trk.apply_affine_correction(transform)
|
||||
|
||||
trust = (dets[:, 4] - self.det_thresh) / (1 - self.det_thresh)
|
||||
af = self.alpha_fixed_emb
|
||||
# From [self.alpha_fixed_emb, 1], goes to 1 as detector is less confident
|
||||
dets_alpha = af + (1 - af) * (1 - trust)
|
||||
|
||||
# get predicted locations from existing trackers.
|
||||
trks = np.zeros((len(self.trackers), 5))
|
||||
trk_embs = []
|
||||
to_del = []
|
||||
ret = []
|
||||
for t, trk in enumerate(trks):
|
||||
pos = self.trackers[t].predict()[0]
|
||||
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
||||
if np.any(np.isnan(pos)):
|
||||
to_del.append(t)
|
||||
else:
|
||||
trk_embs.append(self.trackers[t].get_emb())
|
||||
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
||||
|
||||
if len(trk_embs) > 0:
|
||||
trk_embs = np.vstack(trk_embs)
|
||||
else:
|
||||
trk_embs = np.array(trk_embs)
|
||||
|
||||
for t in reversed(to_del):
|
||||
self.trackers.pop(t)
|
||||
|
||||
velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])
|
||||
last_boxes = np.array([trk.last_observation for trk in self.trackers])
|
||||
k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])
|
||||
|
||||
"""
|
||||
First round of association
|
||||
"""
|
||||
# (M detections X N tracks, final score)
|
||||
if self.embedding_off or dets.shape[0] == 0 or trk_embs.shape[0] == 0:
|
||||
stage1_emb_cost = None
|
||||
else:
|
||||
stage1_emb_cost = dets_embs @ trk_embs.T
|
||||
matched, unmatched_dets, unmatched_trks = associate(
|
||||
dets,
|
||||
trks,
|
||||
self.iou_threshold,
|
||||
velocities,
|
||||
k_observations,
|
||||
self.inertia,
|
||||
stage1_emb_cost,
|
||||
self.w_association_emb,
|
||||
self.aw_off,
|
||||
self.aw_param,
|
||||
)
|
||||
for m in matched:
|
||||
self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5])
|
||||
self.trackers[m[1]].update_emb(dets_embs[m[0]], alpha=dets_alpha[m[0]])
|
||||
|
||||
"""
|
||||
Second round of associaton by OCR
|
||||
"""
|
||||
if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
|
||||
left_dets = dets[unmatched_dets]
|
||||
left_dets_embs = dets_embs[unmatched_dets]
|
||||
left_trks = last_boxes[unmatched_trks]
|
||||
left_trks_embs = trk_embs[unmatched_trks]
|
||||
|
||||
iou_left = self.asso_func(left_dets, left_trks)
|
||||
# TODO: is better without this
|
||||
emb_cost_left = left_dets_embs @ left_trks_embs.T
|
||||
if self.embedding_off:
|
||||
emb_cost_left = np.zeros_like(emb_cost_left)
|
||||
iou_left = np.array(iou_left)
|
||||
if iou_left.max() > self.iou_threshold:
|
||||
"""
|
||||
NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
|
||||
get a higher performance especially on MOT17/MOT20 datasets. But we keep it
|
||||
uniform here for simplicity
|
||||
"""
|
||||
rematched_indices = linear_assignment(-iou_left)
|
||||
to_remove_det_indices = []
|
||||
to_remove_trk_indices = []
|
||||
for m in rematched_indices:
|
||||
det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
|
||||
if iou_left[m[0], m[1]] < self.iou_threshold:
|
||||
continue
|
||||
self.trackers[trk_ind].update(dets[det_ind, :5], dets[det_ind, 5])
|
||||
self.trackers[trk_ind].update_emb(dets_embs[det_ind], alpha=dets_alpha[det_ind])
|
||||
to_remove_det_indices.append(det_ind)
|
||||
to_remove_trk_indices.append(trk_ind)
|
||||
unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))
|
||||
unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))
|
||||
|
||||
for m in unmatched_trks:
|
||||
self.trackers[m].update(None, None)
|
||||
|
||||
# create and initialise new trackers for unmatched detections
|
||||
for i in unmatched_dets:
|
||||
trk = KalmanBoxTracker(
|
||||
dets[i, :5], dets[i, 5], delta_t=self.delta_t, emb=dets_embs[i], alpha=dets_alpha[i], new_kf=not self.new_kf_off
|
||||
)
|
||||
self.trackers.append(trk)
|
||||
i = len(self.trackers)
|
||||
for trk in reversed(self.trackers):
|
||||
if trk.last_observation.sum() < 0:
|
||||
d = trk.get_state()[0]
|
||||
else:
|
||||
"""
|
||||
this is optional to use the recent observation or the kalman filter prediction,
|
||||
we didn't notice significant difference here
|
||||
"""
|
||||
d = trk.last_observation[:4]
|
||||
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
|
||||
# +1 as MOT benchmark requires positive
|
||||
ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))
|
||||
i -= 1
|
||||
# remove dead tracklet
|
||||
if trk.time_since_update > self.max_age:
|
||||
self.trackers.pop(i)
|
||||
if len(ret) > 0:
|
||||
return np.concatenate(ret)
|
||||
return np.empty((0, 5))
|
||||
|
||||
def _xywh_to_xyxy(self, bbox_xywh):
|
||||
x, y, w, h = bbox_xywh
|
||||
x1 = max(int(x - w / 2), 0)
|
||||
x2 = min(int(x + w / 2), self.width - 1)
|
||||
y1 = max(int(y - h / 2), 0)
|
||||
y2 = min(int(y + h / 2), self.height - 1)
|
||||
return x1, y1, x2, y2
|
||||
|
||||
def _get_features(self, bbox_xywh, ori_img):
|
||||
im_crops = []
|
||||
for box in bbox_xywh:
|
||||
x1, y1, x2, y2 = self._xywh_to_xyxy(box)
|
||||
im = ori_img[y1:y2, x1:x2]
|
||||
im_crops.append(im)
|
||||
if im_crops:
|
||||
features = self.embedder(im_crops).cpu()
|
||||
else:
|
||||
features = np.array([])
|
||||
|
||||
return features
|
||||
|
||||
def update_public(self, dets, cates, scores):
|
||||
self.frame_count += 1
|
||||
|
||||
det_scores = np.ones((dets.shape[0], 1))
|
||||
dets = np.concatenate((dets, det_scores), axis=1)
|
||||
|
||||
remain_inds = scores > self.det_thresh
|
||||
|
||||
cates = cates[remain_inds]
|
||||
dets = dets[remain_inds]
|
||||
|
||||
trks = np.zeros((len(self.trackers), 5))
|
||||
to_del = []
|
||||
ret = []
|
||||
for t, trk in enumerate(trks):
|
||||
pos = self.trackers[t].predict()[0]
|
||||
cat = self.trackers[t].cate
|
||||
trk[:] = [pos[0], pos[1], pos[2], pos[3], cat]
|
||||
if np.any(np.isnan(pos)):
|
||||
to_del.append(t)
|
||||
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
||||
for t in reversed(to_del):
|
||||
self.trackers.pop(t)
|
||||
|
||||
velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])
|
||||
last_boxes = np.array([trk.last_observation for trk in self.trackers])
|
||||
k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])
|
||||
|
||||
matched, unmatched_dets, unmatched_trks = associate_kitti(
|
||||
dets,
|
||||
trks,
|
||||
cates,
|
||||
self.iou_threshold,
|
||||
velocities,
|
||||
k_observations,
|
||||
self.inertia,
|
||||
)
|
||||
|
||||
for m in matched:
|
||||
self.trackers[m[1]].update(dets[m[0], :])
|
||||
|
||||
if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
|
||||
"""
|
||||
The re-association stage by OCR.
|
||||
NOTE: at this stage, adding other strategy might be able to continue improve
|
||||
the performance, such as BYTE association by ByteTrack.
|
||||
"""
|
||||
left_dets = dets[unmatched_dets]
|
||||
left_trks = last_boxes[unmatched_trks]
|
||||
left_dets_c = left_dets.copy()
|
||||
left_trks_c = left_trks.copy()
|
||||
|
||||
iou_left = self.asso_func(left_dets_c, left_trks_c)
|
||||
iou_left = np.array(iou_left)
|
||||
det_cates_left = cates[unmatched_dets]
|
||||
trk_cates_left = trks[unmatched_trks][:, 4]
|
||||
num_dets = unmatched_dets.shape[0]
|
||||
num_trks = unmatched_trks.shape[0]
|
||||
cate_matrix = np.zeros((num_dets, num_trks))
|
||||
for i in range(num_dets):
|
||||
for j in range(num_trks):
|
||||
if det_cates_left[i] != trk_cates_left[j]:
|
||||
"""
|
||||
For some datasets, such as KITTI, there are different categories,
|
||||
we have to avoid associate them together.
|
||||
"""
|
||||
cate_matrix[i][j] = -1e6
|
||||
iou_left = iou_left + cate_matrix
|
||||
if iou_left.max() > self.iou_threshold - 0.1:
|
||||
rematched_indices = linear_assignment(-iou_left)
|
||||
to_remove_det_indices = []
|
||||
to_remove_trk_indices = []
|
||||
for m in rematched_indices:
|
||||
det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
|
||||
if iou_left[m[0], m[1]] < self.iou_threshold - 0.1:
|
||||
continue
|
||||
self.trackers[trk_ind].update(dets[det_ind, :])
|
||||
to_remove_det_indices.append(det_ind)
|
||||
to_remove_trk_indices.append(trk_ind)
|
||||
unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))
|
||||
unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))
|
||||
|
||||
for i in unmatched_dets:
|
||||
trk = KalmanBoxTracker(dets[i, :])
|
||||
trk.cate = cates[i]
|
||||
self.trackers.append(trk)
|
||||
i = len(self.trackers)
|
||||
|
||||
for trk in reversed(self.trackers):
|
||||
if trk.last_observation.sum() > 0:
|
||||
d = trk.last_observation[:4]
|
||||
else:
|
||||
d = trk.get_state()[0]
|
||||
if trk.time_since_update < 1:
|
||||
if (self.frame_count <= self.min_hits) or (trk.hit_streak >= self.min_hits):
|
||||
# id+1 as MOT benchmark requires positive
|
||||
ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))
|
||||
if trk.hit_streak == self.min_hits:
|
||||
# Head Padding (HP): recover the lost steps during initializing the track
|
||||
for prev_i in range(self.min_hits - 1):
|
||||
prev_observation = trk.history_observations[-(prev_i + 2)]
|
||||
ret.append(
|
||||
(
|
||||
np.concatenate(
|
||||
(
|
||||
prev_observation[:4],
|
||||
[trk.id + 1],
|
||||
[trk.cls],
|
||||
[trk.conf],
|
||||
)
|
||||
)
|
||||
).reshape(1, -1)
|
||||
)
|
||||
i -= 1
|
||||
if trk.time_since_update > self.max_age:
|
||||
self.trackers.pop(i)
|
||||
|
||||
if len(ret) > 0:
|
||||
return np.concatenate(ret)
|
||||
return np.empty((0, 7))
|
||||
|
||||
def dump_cache(self):
|
||||
self.cmc.dump_cache()
|
||||
self.embedder.dump_cache()
|
237
feeder/trackers/deepocsort/reid_multibackend.py
Normal file
237
feeder/trackers/deepocsort/reid_multibackend.py
Normal file
|
@ -0,0 +1,237 @@
|
|||
import torch.nn as nn
|
||||
import torch
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from itertools import islice
|
||||
import torchvision.transforms as transforms
|
||||
import cv2
|
||||
import sys
|
||||
import torchvision.transforms as T
|
||||
from collections import OrderedDict, namedtuple
|
||||
import gdown
|
||||
from os.path import exists as file_exists
|
||||
|
||||
|
||||
from yolov8.ultralytics.yolo.utils.checks import check_requirements, check_version
|
||||
from yolov8.ultralytics.yolo.utils import LOGGER
|
||||
from trackers.strongsort.deep.reid_model_factory import (show_downloadeable_models, get_model_url, get_model_name,
|
||||
download_url, load_pretrained_weights)
|
||||
from trackers.strongsort.deep.models import build_model
|
||||
|
||||
|
||||
def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
|
||||
# Check file(s) for acceptable suffix
|
||||
if file and suffix:
|
||||
if isinstance(suffix, str):
|
||||
suffix = [suffix]
|
||||
for f in file if isinstance(file, (list, tuple)) else [file]:
|
||||
s = Path(f).suffix.lower() # file suffix
|
||||
if len(s):
|
||||
assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}"
|
||||
|
||||
|
||||
class ReIDDetectMultiBackend(nn.Module):
|
||||
# ReID models MultiBackend class for python inference on various backends
|
||||
def __init__(self, weights='osnet_x0_25_msmt17.pt', device=torch.device('cpu'), fp16=False):
|
||||
super().__init__()
|
||||
|
||||
w = weights[0] if isinstance(weights, list) else weights
|
||||
self.pt, self.jit, self.onnx, self.xml, self.engine, self.tflite = self.model_type(w) # get backend
|
||||
self.fp16 = fp16
|
||||
self.fp16 &= self.pt or self.jit or self.engine # FP16
|
||||
|
||||
# Build transform functions
|
||||
self.device = device
|
||||
self.image_size=(256, 128)
|
||||
self.pixel_mean=[0.485, 0.456, 0.406]
|
||||
self.pixel_std=[0.229, 0.224, 0.225]
|
||||
self.transforms = []
|
||||
self.transforms += [T.Resize(self.image_size)]
|
||||
self.transforms += [T.ToTensor()]
|
||||
self.transforms += [T.Normalize(mean=self.pixel_mean, std=self.pixel_std)]
|
||||
self.preprocess = T.Compose(self.transforms)
|
||||
self.to_pil = T.ToPILImage()
|
||||
|
||||
model_name = get_model_name(w)
|
||||
|
||||
if w.suffix == '.pt':
|
||||
model_url = get_model_url(w)
|
||||
if not file_exists(w) and model_url is not None:
|
||||
gdown.download(model_url, str(w), quiet=False)
|
||||
elif file_exists(w):
|
||||
pass
|
||||
else:
|
||||
print(f'No URL associated to the chosen StrongSORT weights ({w}). Choose between:')
|
||||
show_downloadeable_models()
|
||||
exit()
|
||||
|
||||
# Build model
|
||||
self.model = build_model(
|
||||
model_name,
|
||||
num_classes=1,
|
||||
pretrained=not (w and w.is_file()),
|
||||
use_gpu=device
|
||||
)
|
||||
|
||||
if self.pt: # PyTorch
|
||||
# populate model arch with weights
|
||||
if w and w.is_file() and w.suffix == '.pt':
|
||||
load_pretrained_weights(self.model, w)
|
||||
|
||||
self.model.to(device).eval()
|
||||
self.model.half() if self.fp16 else self.model.float()
|
||||
elif self.jit:
|
||||
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
||||
self.model = torch.jit.load(w)
|
||||
self.model.half() if self.fp16 else self.model.float()
|
||||
elif self.onnx: # ONNX Runtime
|
||||
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
|
||||
cuda = torch.cuda.is_available() and device.type != 'cpu'
|
||||
#check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
|
||||
import onnxruntime
|
||||
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
||||
self.session = onnxruntime.InferenceSession(str(w), providers=providers)
|
||||
elif self.engine: # TensorRT
|
||||
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
||||
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
||||
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
||||
if device.type == 'cpu':
|
||||
device = torch.device('cuda:0')
|
||||
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
||||
logger = trt.Logger(trt.Logger.INFO)
|
||||
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
||||
self.model_ = runtime.deserialize_cuda_engine(f.read())
|
||||
self.context = self.model_.create_execution_context()
|
||||
self.bindings = OrderedDict()
|
||||
self.fp16 = False # default updated below
|
||||
dynamic = False
|
||||
for index in range(self.model_.num_bindings):
|
||||
name = self.model_.get_binding_name(index)
|
||||
dtype = trt.nptype(self.model_.get_binding_dtype(index))
|
||||
if self.model_.binding_is_input(index):
|
||||
if -1 in tuple(self.model_.get_binding_shape(index)): # dynamic
|
||||
dynamic = True
|
||||
self.context.set_binding_shape(index, tuple(self.model_.get_profile_shape(0, index)[2]))
|
||||
if dtype == np.float16:
|
||||
self.fp16 = True
|
||||
shape = tuple(self.context.get_binding_shape(index))
|
||||
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
|
||||
self.bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
|
||||
self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
|
||||
batch_size = self.bindings['images'].shape[0] # if dynamic, this is instead max batch size
|
||||
elif self.xml: # OpenVINO
|
||||
LOGGER.info(f'Loading {w} for OpenVINO inference...')
|
||||
check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
||||
from openvino.runtime import Core, Layout, get_batch
|
||||
ie = Core()
|
||||
if not Path(w).is_file(): # if not *.xml
|
||||
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
|
||||
network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
|
||||
if network.get_parameters()[0].get_layout().empty:
|
||||
network.get_parameters()[0].set_layout(Layout("NCWH"))
|
||||
batch_dim = get_batch(network)
|
||||
if batch_dim.is_static:
|
||||
batch_size = batch_dim.get_length()
|
||||
self.executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
|
||||
self.output_layer = next(iter(self.executable_network.outputs))
|
||||
|
||||
elif self.tflite:
|
||||
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
|
||||
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
||||
from tflite_runtime.interpreter import Interpreter, load_delegate
|
||||
except ImportError:
|
||||
import tensorflow as tf
|
||||
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
|
||||
self.interpreter = tf.lite.Interpreter(model_path=w)
|
||||
self.interpreter.allocate_tensors()
|
||||
# Get input and output tensors.
|
||||
self.input_details = self.interpreter.get_input_details()
|
||||
self.output_details = self.interpreter.get_output_details()
|
||||
|
||||
# Test model on random input data.
|
||||
input_data = np.array(np.random.random_sample((1,256,128,3)), dtype=np.float32)
|
||||
self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
|
||||
|
||||
self.interpreter.invoke()
|
||||
|
||||
# The function `get_tensor()` returns a copy of the tensor data.
|
||||
output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
|
||||
else:
|
||||
print('This model framework is not supported yet!')
|
||||
exit()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def model_type(p='path/to/model.pt'):
|
||||
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
||||
from trackers.reid_export import export_formats
|
||||
sf = list(export_formats().Suffix) # export suffixes
|
||||
check_suffix(p, sf) # checks
|
||||
types = [s in Path(p).name for s in sf]
|
||||
return types
|
||||
|
||||
def _preprocess(self, im_batch):
|
||||
|
||||
images = []
|
||||
for element in im_batch:
|
||||
image = self.to_pil(element)
|
||||
image = self.preprocess(image)
|
||||
images.append(image)
|
||||
|
||||
images = torch.stack(images, dim=0)
|
||||
images = images.to(self.device)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def forward(self, im_batch):
|
||||
|
||||
# preprocess batch
|
||||
im_batch = self._preprocess(im_batch)
|
||||
|
||||
# batch to half
|
||||
if self.fp16 and im_batch.dtype != torch.float16:
|
||||
im_batch = im_batch.half()
|
||||
|
||||
# batch processing
|
||||
features = []
|
||||
if self.pt:
|
||||
features = self.model(im_batch)
|
||||
elif self.jit: # TorchScript
|
||||
features = self.model(im_batch)
|
||||
elif self.onnx: # ONNX Runtime
|
||||
im_batch = im_batch.cpu().numpy() # torch to numpy
|
||||
features = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im_batch})[0]
|
||||
elif self.engine: # TensorRT
|
||||
if True and im_batch.shape != self.bindings['images'].shape:
|
||||
i_in, i_out = (self.model_.get_binding_index(x) for x in ('images', 'output'))
|
||||
self.context.set_binding_shape(i_in, im_batch.shape) # reshape if dynamic
|
||||
self.bindings['images'] = self.bindings['images']._replace(shape=im_batch.shape)
|
||||
self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
|
||||
s = self.bindings['images'].shape
|
||||
assert im_batch.shape == s, f"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
|
||||
self.binding_addrs['images'] = int(im_batch.data_ptr())
|
||||
self.context.execute_v2(list(self.binding_addrs.values()))
|
||||
features = self.bindings['output'].data
|
||||
elif self.xml: # OpenVINO
|
||||
im_batch = im_batch.cpu().numpy() # FP32
|
||||
features = self.executable_network([im_batch])[self.output_layer]
|
||||
else:
|
||||
print('Framework not supported at the moment, we are working on it...')
|
||||
exit()
|
||||
|
||||
if isinstance(features, (list, tuple)):
|
||||
return self.from_numpy(features[0]) if len(features) == 1 else [self.from_numpy(x) for x in features]
|
||||
else:
|
||||
return self.from_numpy(features)
|
||||
|
||||
def from_numpy(self, x):
|
||||
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
|
||||
|
||||
def warmup(self, imgsz=[(256, 128, 3)]):
|
||||
# Warmup model by running inference once
|
||||
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.tflite
|
||||
if any(warmup_types) and self.device.type != 'cpu':
|
||||
im = [np.empty(*imgsz).astype(np.uint8)] # input
|
||||
for _ in range(2 if self.jit else 1): #
|
||||
self.forward(im) # warmup
|
Loading…
Add table
Add a link
Reference in a new issue