add StrongSORT Tacker

2025-08-10 01:23:09 +07:00 · 2025-08-10 01:23:09 +07:00 · b7d8b3266f
commit b7d8b3266f
parent ffc2e99678
93 changed files with 20230 additions and 6 deletions
--- a/feeder/trackers/deepocsort/init.py
+++ b/feeder/trackers/deepocsort/init.py
@ -0,0 +1,2 @@
+from . import args
+from . import ocsort
--- a/feeder/trackers/deepocsort/args.py
+++ b/feeder/trackers/deepocsort/args.py
@ -0,0 +1,110 @@
+import argparse
+
+
+def make_parser():
+    parser = argparse.ArgumentParser("OC-SORT parameters")
+
+    # distributed
+    parser.add_argument("-b", "--batch-size", type=int, default=1, help="batch size")
+    parser.add_argument("-d", "--devices", default=None, type=int, help="device for training")
+
+    parser.add_argument("--local_rank", default=0, type=int, help="local rank for dist training")
+    parser.add_argument("--num_machines", default=1, type=int, help="num of node for training")
+    parser.add_argument("--machine_rank", default=0, type=int, help="node rank for multi-node training")
+
+    parser.add_argument(
+        "-f",
+        "--exp_file",
+        default=None,
+        type=str,
+        help="pls input your expriment description file",
+    )
+    parser.add_argument(
+        "--test",
+        dest="test",
+        default=False,
+        action="store_true",
+        help="Evaluating on test-dev set.",
+    )
+    parser.add_argument(
+        "opts",
+        help="Modify config options using the command-line",
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+
+    # det args
+    parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
+    parser.add_argument("--conf", default=0.1, type=float, help="test conf")
+    parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold")
+    parser.add_argument("--tsize", default=[800, 1440], nargs="+", type=int, help="test img size")
+    parser.add_argument("--seed", default=None, type=int, help="eval seed")
+
+    # tracking args
+    parser.add_argument("--track_thresh", type=float, default=0.6, help="detection confidence threshold")
+    parser.add_argument(
+        "--iou_thresh",
+        type=float,
+        default=0.3,
+        help="the iou threshold in Sort for matching",
+    )
+    parser.add_argument("--min_hits", type=int, default=3, help="min hits to create track in SORT")
+    parser.add_argument(
+        "--inertia",
+        type=float,
+        default=0.2,
+        help="the weight of VDC term in cost matrix",
+    )
+    parser.add_argument(
+        "--deltat",
+        type=int,
+        default=3,
+        help="time step difference to estimate direction",
+    )
+    parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks")
+    parser.add_argument(
+        "--match_thresh",
+        type=float,
+        default=0.9,
+        help="matching threshold for tracking",
+    )
+    parser.add_argument(
+        "--gt-type",
+        type=str,
+        default="_val_half",
+        help="suffix to find the gt annotation",
+    )
+    parser.add_argument("--public", action="store_true", help="use public detection")
+    parser.add_argument("--asso", default="iou", help="similarity function: iou/giou/diou/ciou/ctdis")
+
+    # for kitti/bdd100k inference with public detections
+    parser.add_argument(
+        "--raw_results_path",
+        type=str,
+        default="exps/permatrack_kitti_test/",
+        help="path to the raw tracking results from other tracks",
+    )
+    parser.add_argument("--out_path", type=str, help="path to save output results")
+    parser.add_argument(
+        "--hp",
+        action="store_true",
+        help="use head padding to add the missing objects during \
+            initializing the tracks (offline).",
+    )
+
+    # for demo video
+    parser.add_argument("--demo_type", default="image", help="demo type, eg. image, video and webcam")
+    parser.add_argument("--path", default="./videos/demo.mp4", help="path to images or video")
+    parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
+    parser.add_argument(
+        "--save_result",
+        action="store_true",
+        help="whether to save the inference result of image/video",
+    )
+    parser.add_argument(
+        "--device",
+        default="gpu",
+        type=str,
+        help="device to run our model, can either be cpu or gpu",
+    )
+    return parser
--- a/feeder/trackers/deepocsort/association.py
+++ b/feeder/trackers/deepocsort/association.py
@ -0,0 +1,445 @@
+import os
+import pdb
+
+import numpy as np
+from scipy.special import softmax
+
+
+def iou_batch(bboxes1, bboxes2):
+    """
+    From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
+    """
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    o = wh / (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+    return o
+
+
+def giou_batch(bboxes1, bboxes2):
+    """
+    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
+    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
+    :return:
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    iou = wh / (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+    wc = xxc2 - xxc1
+    hc = yyc2 - yyc1
+    assert (wc > 0).all() and (hc > 0).all()
+    area_enclose = wc * hc
+    giou = iou - (area_enclose - wh) / area_enclose
+    giou = (giou + 1.0) / 2.0  # resize from (-1,1) to (0,1)
+    return giou
+
+
+def diou_batch(bboxes1, bboxes2):
+    """
+    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
+    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
+    :return:
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+
+    # calculate the intersection box
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    iou = wh / (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+
+    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+
+    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
+    diou = iou - inner_diag / outer_diag
+
+    return (diou + 1) / 2.0  # resize from (-1,1) to (0,1)
+
+
+def ciou_batch(bboxes1, bboxes2):
+    """
+    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)
+    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)
+    :return:
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+
+    # calculate the intersection box
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    iou = wh / (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+
+    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+
+    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
+
+    w1 = bboxes1[..., 2] - bboxes1[..., 0]
+    h1 = bboxes1[..., 3] - bboxes1[..., 1]
+    w2 = bboxes2[..., 2] - bboxes2[..., 0]
+    h2 = bboxes2[..., 3] - bboxes2[..., 1]
+
+    # prevent dividing over zero. add one pixel shift
+    h2 = h2 + 1.0
+    h1 = h1 + 1.0
+    arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1)
+    v = (4 / (np.pi**2)) * (arctan**2)
+    S = 1 - iou
+    alpha = v / (S + v)
+    ciou = iou - inner_diag / outer_diag - alpha * v
+
+    return (ciou + 1) / 2.0  # resize from (-1,1) to (0,1)
+
+
+def ct_dist(bboxes1, bboxes2):
+    """
+    Measure the center distance between two sets of bounding boxes,
+    this is a coarse implementation, we don't recommend using it only
+    for association, which can be unstable and sensitive to frame rate
+    and object speed.
+    """
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+
+    ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+
+    ct_dist = np.sqrt(ct_dist2)
+
+    # The linear rescaling is a naive version and needs more study
+    ct_dist = ct_dist / ct_dist.max()
+    return ct_dist.max() - ct_dist  # resize to (0,1)
+
+
+def speed_direction_batch(dets, tracks):
+    tracks = tracks[..., np.newaxis]
+    CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
+    CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
+    dx = CX1 - CX2
+    dy = CY1 - CY2
+    norm = np.sqrt(dx**2 + dy**2) + 1e-6
+    dx = dx / norm
+    dy = dy / norm
+    return dy, dx  # size: num_track x num_det
+
+
+def linear_assignment(cost_matrix):
+    try:
+        import lap
+
+        _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
+        return np.array([[y[i], i] for i in x if i >= 0])  #
+    except ImportError:
+        from scipy.optimize import linear_sum_assignment
+
+        x, y = linear_sum_assignment(cost_matrix)
+        return np.array(list(zip(x, y)))
+
+
+def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
+    """
+    Assigns detections to tracked object (both represented as bounding boxes)
+    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+    """
+    if len(trackers) == 0:
+        return (
+            np.empty((0, 2), dtype=int),
+            np.arange(len(detections)),
+            np.empty((0, 5), dtype=int),
+        )
+
+    iou_matrix = iou_batch(detections, trackers)
+
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(-iou_matrix)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+
+
+def compute_aw_max_metric(emb_cost, w_association_emb, bottom=0.5):
+    w_emb = np.full_like(emb_cost, w_association_emb)
+
+    for idx in range(emb_cost.shape[0]):
+        inds = np.argsort(-emb_cost[idx])
+        # If there's less than two matches, just keep original weight
+        if len(inds) < 2:
+            continue
+        if emb_cost[idx, inds[0]] == 0:
+            row_weight = 0
+        else:
+            row_weight = 1 - max((emb_cost[idx, inds[1]] / emb_cost[idx, inds[0]]) - bottom, 0) / (1 - bottom)
+        w_emb[idx] *= row_weight
+
+    for idj in range(emb_cost.shape[1]):
+        inds = np.argsort(-emb_cost[:, idj])
+        # If there's less than two matches, just keep original weight
+        if len(inds) < 2:
+            continue
+        if emb_cost[inds[0], idj] == 0:
+            col_weight = 0
+        else:
+            col_weight = 1 - max((emb_cost[inds[1], idj] / emb_cost[inds[0], idj]) - bottom, 0) / (1 - bottom)
+        w_emb[:, idj] *= col_weight
+
+    return w_emb * emb_cost
+
+
+def associate(
+    detections, trackers, iou_threshold, velocities, previous_obs, vdc_weight, emb_cost, w_assoc_emb, aw_off, aw_param
+):
+    if len(trackers) == 0:
+        return (
+            np.empty((0, 2), dtype=int),
+            np.arange(len(detections)),
+            np.empty((0, 5), dtype=int),
+        )
+
+    Y, X = speed_direction_batch(detections, previous_obs)
+    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+
+    iou_matrix = iou_batch(detections, trackers)
+    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
+    # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost.T
+    angle_diff_cost = angle_diff_cost * scores
+
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            if emb_cost is None:
+                emb_cost = 0
+            else:
+                emb_cost = emb_cost.cpu().numpy()
+                emb_cost[iou_matrix <= 0] = 0
+                if not aw_off:
+                    emb_cost = compute_aw_max_metric(emb_cost, w_assoc_emb, bottom=aw_param)
+                else:
+                    emb_cost *= w_assoc_emb
+
+            final_cost = -(iou_matrix + angle_diff_cost + emb_cost)
+            matched_indices = linear_assignment(final_cost)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+
+
+def associate_kitti(detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight):
+    if len(trackers) == 0:
+        return (
+            np.empty((0, 2), dtype=int),
+            np.arange(len(detections)),
+            np.empty((0, 5), dtype=int),
+        )
+
+    """
+        Cost from the velocity direction consistency
+    """
+    Y, X = speed_direction_batch(detections, previous_obs)
+    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+
+    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost.T
+    angle_diff_cost = angle_diff_cost * scores
+
+    """
+        Cost from IoU
+    """
+    iou_matrix = iou_batch(detections, trackers)
+
+    """
+        With multiple categories, generate the cost for catgory mismatch
+    """
+    num_dets = detections.shape[0]
+    num_trk = trackers.shape[0]
+    cate_matrix = np.zeros((num_dets, num_trk))
+    for i in range(num_dets):
+        for j in range(num_trk):
+            if det_cates[i] != trackers[j, 4]:
+                cate_matrix[i][j] = -1e6
+
+    cost_matrix = -iou_matrix - angle_diff_cost - cate_matrix
+
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(cost_matrix)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
--- a/feeder/trackers/deepocsort/cmc.py
+++ b/feeder/trackers/deepocsort/cmc.py
@ -0,0 +1,170 @@
+import pdb
+import pickle
+import os
+
+import cv2
+import numpy as np
+
+
+class CMCComputer:
+    def __init__(self, minimum_features=10, method="sparse"):
+        assert method in ["file", "sparse", "sift"]
+
+        os.makedirs("./cache", exist_ok=True)
+        self.cache_path = "./cache/affine_ocsort.pkl"
+        self.cache = {}
+        if os.path.exists(self.cache_path):
+            with open(self.cache_path, "rb") as fp:
+                self.cache = pickle.load(fp)
+        self.minimum_features = minimum_features
+        self.prev_img = None
+        self.prev_desc = None
+        self.sparse_flow_param = dict(
+            maxCorners=3000,
+            qualityLevel=0.01,
+            minDistance=1,
+            blockSize=3,
+            useHarrisDetector=False,
+            k=0.04,
+        )
+        self.file_computed = {}
+
+        self.comp_function = None
+        if method == "sparse":
+            self.comp_function = self._affine_sparse_flow
+        elif method == "sift":
+            self.comp_function = self._affine_sift
+        # Same BoT-SORT CMC arrays
+        elif method == "file":
+            self.comp_function = self._affine_file
+            self.file_affines = {}
+            # Maps from tag name to file name
+            self.file_names = {}
+
+            # All the ablation file names
+            for f_name in os.listdir("./cache/cmc_files/MOT17_ablation/"):
+                # The tag that'll be passed into compute_affine based on image name
+                tag = f_name.replace("GMC-", "").replace(".txt", "") + "-FRCNN"
+                f_name = os.path.join("./cache/cmc_files/MOT17_ablation/", f_name)
+                self.file_names[tag] = f_name
+            for f_name in os.listdir("./cache/cmc_files/MOT20_ablation/"):
+                tag = f_name.replace("GMC-", "").replace(".txt", "")
+                f_name = os.path.join("./cache/cmc_files/MOT20_ablation/", f_name)
+                self.file_names[tag] = f_name
+
+            # All the test file names
+            for f_name in os.listdir("./cache/cmc_files/MOTChallenge/"):
+                tag = f_name.replace("GMC-", "").replace(".txt", "")
+                if "MOT17" in tag:
+                    tag = tag + "-FRCNN"
+                # If it's an ablation one (not test) don't overwrite it
+                if tag in self.file_names:
+                    continue
+                f_name = os.path.join("./cache/cmc_files/MOTChallenge/", f_name)
+                self.file_names[tag] = f_name
+
+    def compute_affine(self, img, bbox, tag):
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        if tag in self.cache:
+            A = self.cache[tag]
+            return A
+        mask = np.ones_like(img, dtype=np.uint8)
+        if bbox.shape[0] > 0:
+            bbox = np.round(bbox).astype(np.int32)
+            bbox[bbox < 0] = 0
+            for bb in bbox:
+                mask[bb[1] : bb[3], bb[0] : bb[2]] = 0
+
+        A = self.comp_function(img, mask, tag)
+        self.cache[tag] = A
+
+        return A
+
+    def _load_file(self, name):
+        affines = []
+        with open(self.file_names[name], "r") as fp:
+            for line in fp:
+                tokens = [float(f) for f in line.split("\t")[1:7]]
+                A = np.eye(2, 3)
+                A[0, 0] = tokens[0]
+                A[0, 1] = tokens[1]
+                A[0, 2] = tokens[2]
+                A[1, 0] = tokens[3]
+                A[1, 1] = tokens[4]
+                A[1, 2] = tokens[5]
+                affines.append(A)
+        self.file_affines[name] = affines
+
+    def _affine_file(self, frame, mask, tag):
+        name, num = tag.split(":")
+        if name not in self.file_affines:
+            self._load_file(name)
+        if name not in self.file_affines:
+            raise RuntimeError("Error loading file affines for CMC.")
+
+        return self.file_affines[name][int(num) - 1]
+
+    def _affine_sift(self, frame, mask, tag):
+        A = np.eye(2, 3)
+        detector = cv2.SIFT_create()
+        kp, desc = detector.detectAndCompute(frame, mask)
+        if self.prev_desc is None:
+            self.prev_desc = [kp, desc]
+            return A
+        if desc.shape[0] < self.minimum_features or self.prev_desc[1].shape[0] < self.minimum_features:
+            return A
+
+        bf = cv2.BFMatcher(cv2.NORM_L2)
+        matches = bf.knnMatch(self.prev_desc[1], desc, k=2)
+        good = []
+        for m, n in matches:
+            if m.distance < 0.7 * n.distance:
+                good.append(m)
+
+        if len(good) > self.minimum_features:
+            src_pts = np.float32([self.prev_desc[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
+            dst_pts = np.float32([kp[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)
+            A, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC)
+        else:
+            print("Warning: not enough matching points")
+        if A is None:
+            A = np.eye(2, 3)
+
+        self.prev_desc = [kp, desc]
+        return A
+
+    def _affine_sparse_flow(self, frame, mask, tag):
+        # Initialize
+        A = np.eye(2, 3)
+
+        # find the keypoints
+        keypoints = cv2.goodFeaturesToTrack(frame, mask=mask, **self.sparse_flow_param)
+
+        # Handle first frame
+        if self.prev_img is None:
+            self.prev_img = frame
+            self.prev_desc = keypoints
+            return A
+
+        matched_kp, status, err = cv2.calcOpticalFlowPyrLK(self.prev_img, frame, self.prev_desc, None)
+        matched_kp = matched_kp.reshape(-1, 2)
+        status = status.reshape(-1)
+        prev_points = self.prev_desc.reshape(-1, 2)
+        prev_points = prev_points[status]
+        curr_points = matched_kp[status]
+
+        # Find rigid matrix
+        if prev_points.shape[0] > self.minimum_features:
+            A, _ = cv2.estimateAffinePartial2D(prev_points, curr_points, method=cv2.RANSAC)
+        else:
+            print("Warning: not enough matching points")
+        if A is None:
+            A = np.eye(2, 3)
+
+        self.prev_img = frame
+        self.prev_desc = keypoints
+        return A
+
+    def dump_cache(self):
+        with open(self.cache_path, "wb") as fp:
+            pickle.dump(self.cache, fp)
--- a/feeder/trackers/deepocsort/configs/deepocsort.yaml
+++ b/feeder/trackers/deepocsort/configs/deepocsort.yaml
@ -0,0 +1,12 @@
+# Trial number:      137
+# HOTA, MOTA, IDF1:  [55.567]
+deepocsort:
+  asso_func: giou
+  conf_thres: 0.5122620708221085
+  delta_t: 1
+  det_thresh: 0
+  inertia: 0.3941737016672115
+  iou_thresh: 0.22136877277096445
+  max_age: 50
+  min_hits: 1
+  use_byte: false
--- a/feeder/trackers/deepocsort/embedding.py
+++ b/feeder/trackers/deepocsort/embedding.py
@ -0,0 +1,116 @@
+import pdb
+from collections import OrderedDict
+import os
+import pickle
+
+import torch
+import cv2
+import torchvision
+import numpy as np
+
+
+
+class EmbeddingComputer:
+    def __init__(self, dataset):
+        self.model = None
+        self.dataset = dataset
+        self.crop_size = (128, 384)
+        os.makedirs("./cache/embeddings/", exist_ok=True)
+        self.cache_path = "./cache/embeddings/{}_embedding.pkl"
+        self.cache = {}
+        self.cache_name = ""
+
+    def load_cache(self, path):
+        self.cache_name = path
+        cache_path = self.cache_path.format(path)
+        if os.path.exists(cache_path):
+            with open(cache_path, "rb") as fp:
+                self.cache = pickle.load(fp)
+
+    def compute_embedding(self, img, bbox, tag, is_numpy=True):
+        if self.cache_name != tag.split(":")[0]:
+            self.load_cache(tag.split(":")[0])
+
+        if tag in self.cache:
+            embs = self.cache[tag]
+            if embs.shape[0] != bbox.shape[0]:
+                raise RuntimeError(
+                    "ERROR: The number of cached embeddings don't match the "
+                    "number of detections.\nWas the detector model changed? Delete cache if so."
+                )
+            return embs
+
+        if self.model is None:
+            self.initialize_model()
+
+        # Make sure bbox is within image frame
+        if is_numpy:
+            h, w = img.shape[:2]
+        else:
+            h, w = img.shape[2:]
+        results = np.round(bbox).astype(np.int32)
+        results[:, 0] = results[:, 0].clip(0, w)
+        results[:, 1] = results[:, 1].clip(0, h)
+        results[:, 2] = results[:, 2].clip(0, w)
+        results[:, 3] = results[:, 3].clip(0, h)
+
+        # Generate all the crops
+        crops = []
+        for p in results:
+            if is_numpy:
+                crop = img[p[1] : p[3], p[0] : p[2]]
+                crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
+                crop = cv2.resize(crop, self.crop_size, interpolation=cv2.INTER_LINEAR)
+                crop = torch.as_tensor(crop.astype("float32").transpose(2, 0, 1))
+                crop = crop.unsqueeze(0)
+            else:
+                crop = img[:, :, p[1] : p[3], p[0] : p[2]]
+                crop = torchvision.transforms.functional.resize(crop, self.crop_size)
+
+            crops.append(crop)
+
+        crops = torch.cat(crops, dim=0)
+
+        # Create embeddings and l2 normalize them
+        with torch.no_grad():
+            crops = crops.cuda()
+            crops = crops.half()
+            embs = self.model(crops)
+        embs = torch.nn.functional.normalize(embs)
+        embs = embs.cpu().numpy()
+
+        self.cache[tag] = embs
+        return embs
+
+    def initialize_model(self):
+        """
+        model = torchreid.models.build_model(name="osnet_ain_x1_0", num_classes=2510, loss="softmax", pretrained=False)
+        sd = torch.load("external/weights/osnet_ain_ms_d_c.pth.tar")["state_dict"]
+        new_state_dict = OrderedDict()
+        for k, v in sd.items():
+            name = k[7:]  # remove `module.`
+            new_state_dict[name] = v
+        # load params
+        model.load_state_dict(new_state_dict)
+        model.eval()
+        model.cuda()
+        """
+        if self.dataset == "mot17":
+            path = "external/weights/mot17_sbs_S50.pth"
+        elif self.dataset == "mot20":
+            path = "external/weights/mot20_sbs_S50.pth"
+        elif self.dataset == "dance":
+            path = None
+        else:
+            raise RuntimeError("Need the path for a new ReID model.")
+
+        model = FastReID(path)
+        model.eval()
+        model.cuda()
+        model.half()
+        self.model = model
+
+    def dump_cache(self):
+        if self.cache_name:
+            with open(self.cache_path.format(self.cache_name), "wb") as fp:
+                pickle.dump(self.cache, fp)
--- a/feeder/trackers/deepocsort/kalmanfilter.py
+++ b/feeder/trackers/deepocsort/kalmanfilter.py
--- a/feeder/trackers/deepocsort/ocsort.py
+++ b/feeder/trackers/deepocsort/ocsort.py
@ -0,0 +1,670 @@
+"""
+    This script is adopted from the SORT script by Alex Bewley alex@bewley.ai
+"""
+from __future__ import print_function
+
+import pdb
+import pickle
+
+import cv2
+import torch
+import torchvision
+
+import numpy as np
+from .association import *
+from .embedding import EmbeddingComputer
+from .cmc import CMCComputer
+from reid_multibackend import ReIDDetectMultiBackend
+
+
+def k_previous_obs(observations, cur_age, k):
+    if len(observations) == 0:
+        return [-1, -1, -1, -1, -1]
+    for i in range(k):
+        dt = k - i
+        if cur_age - dt in observations:
+            return observations[cur_age - dt]
+    max_age = max(observations.keys())
+    return observations[max_age]
+
+
+def convert_bbox_to_z(bbox):
+    """
+    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
+      [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+      the aspect ratio
+    """
+    w = bbox[2] - bbox[0]
+    h = bbox[3] - bbox[1]
+    x = bbox[0] + w / 2.0
+    y = bbox[1] + h / 2.0
+    s = w * h  # scale is just area
+    r = w / float(h + 1e-6)
+    return np.array([x, y, s, r]).reshape((4, 1))
+
+
+def convert_bbox_to_z_new(bbox):
+    w = bbox[2] - bbox[0]
+    h = bbox[3] - bbox[1]
+    x = bbox[0] + w / 2.0
+    y = bbox[1] + h / 2.0
+    return np.array([x, y, w, h]).reshape((4, 1))
+
+
+def convert_x_to_bbox_new(x):
+    x, y, w, h = x.reshape(-1)[:4]
+    return np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2]).reshape(1, 4)
+
+
+def convert_x_to_bbox(x, score=None):
+    """
+    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+      [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+    """
+    w = np.sqrt(x[2] * x[3])
+    h = x[2] / w
+    if score == None:
+        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0]).reshape((1, 4))
+    else:
+        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score]).reshape((1, 5))
+
+
+def speed_direction(bbox1, bbox2):
+    cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
+    cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
+    speed = np.array([cy2 - cy1, cx2 - cx1])
+    norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6
+    return speed / norm
+
+
+def new_kf_process_noise(w, h, p=1 / 20, v=1 / 160):
+    Q = np.diag(
+        ((p * w) ** 2, (p * h) ** 2, (p * w) ** 2, (p * h) ** 2, (v * w) ** 2, (v * h) ** 2, (v * w) ** 2, (v * h) ** 2)
+    )
+    return Q
+
+
+def new_kf_measurement_noise(w, h, m=1 / 20):
+    w_var = (m * w) ** 2
+    h_var = (m * h) ** 2
+    R = np.diag((w_var, h_var, w_var, h_var))
+    return R
+
+
+class KalmanBoxTracker(object):
+    """
+    This class represents the internal state of individual tracked objects observed as bbox.
+    """
+
+    count = 0
+
+    def __init__(self, bbox, cls, delta_t=3, orig=False, emb=None, alpha=0, new_kf=False):
+        """
+        Initialises a tracker using initial bounding box.
+
+        """
+        # define constant velocity model
+        if not orig:
+            from .kalmanfilter import KalmanFilterNew as KalmanFilter
+        else:
+            from filterpy.kalman import KalmanFilter
+        self.cls = cls
+        self.conf = bbox[-1]
+        self.new_kf = new_kf
+        if new_kf:
+            self.kf = KalmanFilter(dim_x=8, dim_z=4)
+            self.kf.F = np.array(
+                [
+                    # x y w h x' y' w' h'
+                    [1, 0, 0, 0, 1, 0, 0, 0],
+                    [0, 1, 0, 0, 0, 1, 0, 0],
+                    [0, 0, 1, 0, 0, 0, 1, 0],
+                    [0, 0, 0, 1, 0, 0, 0, 1],
+                    [0, 0, 0, 0, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 1, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 1, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 1],
+                ]
+            )
+            self.kf.H = np.array(
+                [
+                    [1, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 1, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 0, 0, 0, 0],
+                ]
+            )
+            _, _, w, h = convert_bbox_to_z_new(bbox).reshape(-1)
+            self.kf.P = new_kf_process_noise(w, h)
+            self.kf.P[:4, :4] *= 4
+            self.kf.P[4:, 4:] *= 100
+            # Process and measurement uncertainty happen in functions
+            self.bbox_to_z_func = convert_bbox_to_z_new
+            self.x_to_bbox_func = convert_x_to_bbox_new
+        else:
+            self.kf = KalmanFilter(dim_x=7, dim_z=4)
+            self.kf.F = np.array(
+                [
+                    # x  y  s  r  x' y' s'
+                    [1, 0, 0, 0, 1, 0, 0],
+                    [0, 1, 0, 0, 0, 1, 0],
+                    [0, 0, 1, 0, 0, 0, 1],
+                    [0, 0, 0, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 1, 0, 0],
+                    [0, 0, 0, 0, 0, 1, 0],
+                    [0, 0, 0, 0, 0, 0, 1],
+                ]
+            )
+            self.kf.H = np.array(
+                [
+                    [1, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 0, 0, 0, 0, 0],
+                    [0, 0, 1, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 0, 0, 0],
+                ]
+            )
+            self.kf.R[2:, 2:] *= 10.0
+            self.kf.P[4:, 4:] *= 1000.0  # give high uncertainty to the unobservable initial velocities
+            self.kf.P *= 10.0
+            self.kf.Q[-1, -1] *= 0.01
+            self.kf.Q[4:, 4:] *= 0.01
+            self.bbox_to_z_func = convert_bbox_to_z
+            self.x_to_bbox_func = convert_x_to_bbox
+
+        self.kf.x[:4] = self.bbox_to_z_func(bbox)
+
+        self.time_since_update = 0
+        self.id = KalmanBoxTracker.count
+        KalmanBoxTracker.count += 1
+        self.history = []
+        self.hits = 0
+        self.hit_streak = 0
+        self.age = 0
+        """
+        NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of 
+        function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a 
+        fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.
+        """
+        # Used for OCR
+        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder
+        # Used to output track after min_hits reached
+        self.history_observations = []
+        # Used for velocity
+        self.observations = dict()
+        self.velocity = None
+        self.delta_t = delta_t
+
+        self.emb = emb
+
+        self.frozen = False
+
+    def update(self, bbox, cls):
+        """
+        Updates the state vector with observed bbox.
+        """
+        if bbox is not None:
+            self.frozen = False
+            self.cls = cls
+            if self.last_observation.sum() >= 0:  # no previous observation
+                previous_box = None
+                for dt in range(self.delta_t, 0, -1):
+                    if self.age - dt in self.observations:
+                        previous_box = self.observations[self.age - dt]
+                        break
+                if previous_box is None:
+                    previous_box = self.last_observation
+                """
+                  Estimate the track speed direction with observations \Delta t steps away
+                """
+                self.velocity = speed_direction(previous_box, bbox)
+            """
+              Insert new observations. This is a ugly way to maintain both self.observations
+              and self.history_observations. Bear it for the moment.
+            """
+            self.last_observation = bbox
+            self.observations[self.age] = bbox
+            self.history_observations.append(bbox)
+
+            self.time_since_update = 0
+            self.history = []
+            self.hits += 1
+            self.hit_streak += 1
+            if self.new_kf:
+                R = new_kf_measurement_noise(self.kf.x[2, 0], self.kf.x[3, 0])
+                self.kf.update(self.bbox_to_z_func(bbox), R=R)
+            else:
+                self.kf.update(self.bbox_to_z_func(bbox))
+        else:
+            self.kf.update(bbox)
+            self.frozen = True
+
+    def update_emb(self, emb, alpha=0.9):
+        self.emb = alpha * self.emb + (1 - alpha) * emb
+        self.emb /= np.linalg.norm(self.emb)
+
+    def get_emb(self):
+        return self.emb.cpu()
+
+    def apply_affine_correction(self, affine):
+        m = affine[:, :2]
+        t = affine[:, 2].reshape(2, 1)
+        # For OCR
+        if self.last_observation.sum() > 0:
+            ps = self.last_observation[:4].reshape(2, 2).T
+            ps = m @ ps + t
+            self.last_observation[:4] = ps.T.reshape(-1)
+
+        # Apply to each box in the range of velocity computation
+        for dt in range(self.delta_t, -1, -1):
+            if self.age - dt in self.observations:
+                ps = self.observations[self.age - dt][:4].reshape(2, 2).T
+                ps = m @ ps + t
+                self.observations[self.age - dt][:4] = ps.T.reshape(-1)
+
+        # Also need to change kf state, but might be frozen
+        self.kf.apply_affine_correction(m, t, self.new_kf)
+
+    def predict(self):
+        """
+        Advances the state vector and returns the predicted bounding box estimate.
+        """
+        # Don't allow negative bounding boxes
+        if self.new_kf:
+            if self.kf.x[2] + self.kf.x[6] <= 0:
+                self.kf.x[6] = 0
+            if self.kf.x[3] + self.kf.x[7] <= 0:
+                self.kf.x[7] = 0
+
+            # Stop velocity, will update in kf during OOS
+            if self.frozen:
+                self.kf.x[6] = self.kf.x[7] = 0
+            Q = new_kf_process_noise(self.kf.x[2, 0], self.kf.x[3, 0])
+        else:
+            if (self.kf.x[6] + self.kf.x[2]) <= 0:
+                self.kf.x[6] *= 0.0
+            Q = None
+
+        self.kf.predict(Q=Q)
+        self.age += 1
+        if self.time_since_update > 0:
+            self.hit_streak = 0
+        self.time_since_update += 1
+        self.history.append(self.x_to_bbox_func(self.kf.x))
+        return self.history[-1]
+
+    def get_state(self):
+        """
+        Returns the current bounding box estimate.
+        """
+        return self.x_to_bbox_func(self.kf.x)
+
+    def mahalanobis(self, bbox):
+        """Should be run after a predict() call for accuracy."""
+        return self.kf.md_for_measurement(self.bbox_to_z_func(bbox))
+
+
+"""
+    We support multiple ways for association cost calculation, by default
+    we use IoU. GIoU may have better performance in some situations. We note 
+    that we hardly normalize the cost by all methods to (0,1) which may not be 
+    the best practice.
+"""
+ASSO_FUNCS = {
+    "iou": iou_batch,
+    "giou": giou_batch,
+    "ciou": ciou_batch,
+    "diou": diou_batch,
+    "ct_dist": ct_dist,
+}
+
+
+class OCSort(object):
+    def __init__(
+        self,
+        model_weights,
+        device,
+        fp16,
+        det_thresh,
+        max_age=30,
+        min_hits=3,
+        iou_threshold=0.3,
+        delta_t=3,
+        asso_func="iou",
+        inertia=0.2,
+        w_association_emb=0.75,
+        alpha_fixed_emb=0.95,
+        aw_param=0.5,
+        embedding_off=False,
+        cmc_off=False,
+        aw_off=False,
+        new_kf_off=False,
+        **kwargs
+    ):
+        """
+        Sets key parameters for SORT
+        """
+        self.max_age = max_age
+        self.min_hits = min_hits
+        self.iou_threshold = iou_threshold
+        self.trackers = []
+        self.frame_count = 0
+        self.det_thresh = det_thresh
+        self.delta_t = delta_t
+        self.asso_func = ASSO_FUNCS[asso_func]
+        self.inertia = inertia
+        self.w_association_emb = w_association_emb
+        self.alpha_fixed_emb = alpha_fixed_emb
+        self.aw_param = aw_param
+        KalmanBoxTracker.count = 0
+
+        self.embedder = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16)
+        self.cmc = CMCComputer()
+        self.embedding_off = embedding_off
+        self.cmc_off = cmc_off
+        self.aw_off = aw_off
+        self.new_kf_off = new_kf_off
+
+    def update(self, dets, img_numpy, tag='blub'):
+        """
+        Params:
+          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
+        Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
+        Returns the a similar array, where the last column is the object ID.
+        NOTE: The number of objects returned may differ from the number of detections provided.
+        """
+        xyxys = dets[:, 0:4]
+        scores = dets[:, 4]
+        clss = dets[:, 5]
+        
+        classes = clss.numpy()
+        xyxys = xyxys.numpy()
+        scores = scores.numpy()
+        
+        dets = dets[:, 0:6].numpy()
+        remain_inds = scores > self.det_thresh
+        dets = dets[remain_inds]
+        self.height, self.width = img_numpy.shape[:2]
+
+        # Rescale
+        #scale = min(img_tensor.shape[2] / img_numpy.shape[0], img_tensor.shape[3] / img_numpy.shape[1])
+        #dets[:, :4] /= scale
+
+        # Embedding
+        if self.embedding_off or dets.shape[0] == 0:
+            dets_embs = np.ones((dets.shape[0], 1))
+        else:
+            # (Ndets x X) [512, 1024, 2048]
+            #dets_embs = self.embedder.compute_embedding(img_numpy, dets[:, :4], tag)
+            dets_embs = self._get_features(dets[:, :4], img_numpy)
+
+        # CMC
+        if not self.cmc_off:
+            transform = self.cmc.compute_affine(img_numpy, dets[:, :4], tag)
+            for trk in self.trackers:
+                trk.apply_affine_correction(transform)
+
+        trust = (dets[:, 4] - self.det_thresh) / (1 - self.det_thresh)
+        af = self.alpha_fixed_emb
+        # From [self.alpha_fixed_emb, 1], goes to 1 as detector is less confident
+        dets_alpha = af + (1 - af) * (1 - trust)
+
+        # get predicted locations from existing trackers.
+        trks = np.zeros((len(self.trackers), 5))
+        trk_embs = []
+        to_del = []
+        ret = []
+        for t, trk in enumerate(trks):
+            pos = self.trackers[t].predict()[0]
+            trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
+            if np.any(np.isnan(pos)):
+                to_del.append(t)
+            else:  
+                trk_embs.append(self.trackers[t].get_emb())
+        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+
+        if len(trk_embs) > 0:
+            trk_embs = np.vstack(trk_embs)
+        else:
+            trk_embs = np.array(trk_embs)
+
+        for t in reversed(to_del):
+            self.trackers.pop(t)
+
+        velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])
+        last_boxes = np.array([trk.last_observation for trk in self.trackers])
+        k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])
+
+        """
+            First round of association
+        """
+        # (M detections X N tracks, final score)
+        if self.embedding_off or dets.shape[0] == 0 or trk_embs.shape[0] == 0:
+            stage1_emb_cost = None
+        else:
+            stage1_emb_cost = dets_embs @ trk_embs.T
+        matched, unmatched_dets, unmatched_trks = associate(
+            dets,
+            trks,
+            self.iou_threshold,
+            velocities,
+            k_observations,
+            self.inertia,
+            stage1_emb_cost,
+            self.w_association_emb,
+            self.aw_off,
+            self.aw_param,
+        )
+        for m in matched:
+            self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5])
+            self.trackers[m[1]].update_emb(dets_embs[m[0]], alpha=dets_alpha[m[0]])
+
+        """
+            Second round of associaton by OCR
+        """
+        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
+            left_dets = dets[unmatched_dets]
+            left_dets_embs = dets_embs[unmatched_dets]
+            left_trks = last_boxes[unmatched_trks]
+            left_trks_embs = trk_embs[unmatched_trks]
+
+            iou_left = self.asso_func(left_dets, left_trks)
+            # TODO: is better without this
+            emb_cost_left = left_dets_embs @ left_trks_embs.T
+            if self.embedding_off:
+                emb_cost_left = np.zeros_like(emb_cost_left)
+            iou_left = np.array(iou_left)
+            if iou_left.max() > self.iou_threshold:
+                """
+                NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
+                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
+                uniform here for simplicity
+                """
+                rematched_indices = linear_assignment(-iou_left)
+                to_remove_det_indices = []
+                to_remove_trk_indices = []
+                for m in rematched_indices:
+                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
+                    if iou_left[m[0], m[1]] < self.iou_threshold:
+                        continue
+                    self.trackers[trk_ind].update(dets[det_ind, :5], dets[det_ind, 5])
+                    self.trackers[trk_ind].update_emb(dets_embs[det_ind], alpha=dets_alpha[det_ind])
+                    to_remove_det_indices.append(det_ind)
+                    to_remove_trk_indices.append(trk_ind)
+                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))
+                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))
+
+        for m in unmatched_trks:
+            self.trackers[m].update(None, None)
+
+        # create and initialise new trackers for unmatched detections
+        for i in unmatched_dets:
+            trk = KalmanBoxTracker(
+                dets[i, :5], dets[i, 5], delta_t=self.delta_t, emb=dets_embs[i], alpha=dets_alpha[i], new_kf=not self.new_kf_off
+            )
+            self.trackers.append(trk)
+        i = len(self.trackers)
+        for trk in reversed(self.trackers):
+            if trk.last_observation.sum() < 0:
+                d = trk.get_state()[0]
+            else:
+                """
+                this is optional to use the recent observation or the kalman filter prediction,
+                we didn't notice significant difference here
+                """
+                d = trk.last_observation[:4]
+            if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
+                # +1 as MOT benchmark requires positive
+                ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))
+            i -= 1
+            # remove dead tracklet
+            if trk.time_since_update > self.max_age:
+                self.trackers.pop(i)
+        if len(ret) > 0:
+            return np.concatenate(ret)
+        return np.empty((0, 5))
+    
+    def _xywh_to_xyxy(self, bbox_xywh):
+        x, y, w, h = bbox_xywh
+        x1 = max(int(x - w / 2), 0)
+        x2 = min(int(x + w / 2), self.width - 1)
+        y1 = max(int(y - h / 2), 0)
+        y2 = min(int(y + h / 2), self.height - 1)
+        return x1, y1, x2, y2
+    
+    def _get_features(self, bbox_xywh, ori_img):
+        im_crops = []
+        for box in bbox_xywh:
+            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
+            im = ori_img[y1:y2, x1:x2]
+            im_crops.append(im)
+        if im_crops:
+            features = self.embedder(im_crops).cpu()
+        else:
+            features = np.array([])
+        
+        return features
+
+    def update_public(self, dets, cates, scores):
+        self.frame_count += 1
+
+        det_scores = np.ones((dets.shape[0], 1))
+        dets = np.concatenate((dets, det_scores), axis=1)
+
+        remain_inds = scores > self.det_thresh
+
+        cates = cates[remain_inds]
+        dets = dets[remain_inds]
+
+        trks = np.zeros((len(self.trackers), 5))
+        to_del = []
+        ret = []
+        for t, trk in enumerate(trks):
+            pos = self.trackers[t].predict()[0]
+            cat = self.trackers[t].cate
+            trk[:] = [pos[0], pos[1], pos[2], pos[3], cat]
+            if np.any(np.isnan(pos)):
+                to_del.append(t)
+        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+        for t in reversed(to_del):
+            self.trackers.pop(t)
+
+        velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])
+        last_boxes = np.array([trk.last_observation for trk in self.trackers])
+        k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])
+
+        matched, unmatched_dets, unmatched_trks = associate_kitti(
+            dets,
+            trks,
+            cates,
+            self.iou_threshold,
+            velocities,
+            k_observations,
+            self.inertia,
+        )
+
+        for m in matched:
+            self.trackers[m[1]].update(dets[m[0], :])
+
+        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
+            """
+            The re-association stage by OCR.
+            NOTE: at this stage, adding other strategy might be able to continue improve
+            the performance, such as BYTE association by ByteTrack.
+            """
+            left_dets = dets[unmatched_dets]
+            left_trks = last_boxes[unmatched_trks]
+            left_dets_c = left_dets.copy()
+            left_trks_c = left_trks.copy()
+
+            iou_left = self.asso_func(left_dets_c, left_trks_c)
+            iou_left = np.array(iou_left)
+            det_cates_left = cates[unmatched_dets]
+            trk_cates_left = trks[unmatched_trks][:, 4]
+            num_dets = unmatched_dets.shape[0]
+            num_trks = unmatched_trks.shape[0]
+            cate_matrix = np.zeros((num_dets, num_trks))
+            for i in range(num_dets):
+                for j in range(num_trks):
+                    if det_cates_left[i] != trk_cates_left[j]:
+                        """
+                        For some datasets, such as KITTI, there are different categories,
+                        we have to avoid associate them together.
+                        """
+                        cate_matrix[i][j] = -1e6
+            iou_left = iou_left + cate_matrix
+            if iou_left.max() > self.iou_threshold - 0.1:
+                rematched_indices = linear_assignment(-iou_left)
+                to_remove_det_indices = []
+                to_remove_trk_indices = []
+                for m in rematched_indices:
+                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
+                    if iou_left[m[0], m[1]] < self.iou_threshold - 0.1:
+                        continue
+                    self.trackers[trk_ind].update(dets[det_ind, :])
+                    to_remove_det_indices.append(det_ind)
+                    to_remove_trk_indices.append(trk_ind)
+                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))
+                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))
+
+        for i in unmatched_dets:
+            trk = KalmanBoxTracker(dets[i, :])
+            trk.cate = cates[i]
+            self.trackers.append(trk)
+        i = len(self.trackers)
+
+        for trk in reversed(self.trackers):
+            if trk.last_observation.sum() > 0:
+                d = trk.last_observation[:4]
+            else:
+                d = trk.get_state()[0]
+            if trk.time_since_update < 1:
+                if (self.frame_count <= self.min_hits) or (trk.hit_streak >= self.min_hits):
+                    # id+1 as MOT benchmark requires positive
+                    ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))
+                if trk.hit_streak == self.min_hits:
+                    # Head Padding (HP): recover the lost steps during initializing the track
+                    for prev_i in range(self.min_hits - 1):
+                        prev_observation = trk.history_observations[-(prev_i + 2)]
+                        ret.append(
+                            (
+                                np.concatenate(
+                                    (
+                                        prev_observation[:4],
+                                        [trk.id + 1],
+                                        [trk.cls],
+                                        [trk.conf],
+                                    )
+                                )
+                            ).reshape(1, -1)
+                        )
+            i -= 1
+            if trk.time_since_update > self.max_age:
+                self.trackers.pop(i)
+
+        if len(ret) > 0:
+            return np.concatenate(ret)
+        return np.empty((0, 7))
+
+    def dump_cache(self):
+        self.cmc.dump_cache()
+        self.embedder.dump_cache()
--- a/feeder/trackers/deepocsort/reid_multibackend.py
+++ b/feeder/trackers/deepocsort/reid_multibackend.py
@ -0,0 +1,237 @@
+import torch.nn as nn
+import torch
+from pathlib import Path
+import numpy as np
+from itertools import islice
+import torchvision.transforms as transforms
+import cv2
+import sys
+import torchvision.transforms as T
+from collections import OrderedDict, namedtuple
+import gdown
+from os.path import exists as file_exists
+
+
+from yolov8.ultralytics.yolo.utils.checks import check_requirements, check_version
+from yolov8.ultralytics.yolo.utils import LOGGER
+from trackers.strongsort.deep.reid_model_factory import (show_downloadeable_models, get_model_url, get_model_name,
+                                                          download_url, load_pretrained_weights)
+from trackers.strongsort.deep.models import build_model
+
+
+def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
+    # Check file(s) for acceptable suffix
+    if file and suffix:
+        if isinstance(suffix, str):
+            suffix = [suffix]
+        for f in file if isinstance(file, (list, tuple)) else [file]:
+            s = Path(f).suffix.lower()  # file suffix
+            if len(s):
+                assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}"
+
+
+class ReIDDetectMultiBackend(nn.Module):
+    # ReID models MultiBackend class for python inference on various backends
+    def __init__(self, weights='osnet_x0_25_msmt17.pt', device=torch.device('cpu'), fp16=False):
+        super().__init__()
+
+        w = weights[0] if isinstance(weights, list) else weights
+        self.pt, self.jit, self.onnx, self.xml, self.engine, self.tflite = self.model_type(w)  # get backend
+        self.fp16 = fp16
+        self.fp16 &= self.pt or self.jit or self.engine  # FP16
+
+        # Build transform functions
+        self.device = device
+        self.image_size=(256, 128)
+        self.pixel_mean=[0.485, 0.456, 0.406]
+        self.pixel_std=[0.229, 0.224, 0.225]
+        self.transforms = []
+        self.transforms += [T.Resize(self.image_size)]
+        self.transforms += [T.ToTensor()]
+        self.transforms += [T.Normalize(mean=self.pixel_mean, std=self.pixel_std)]
+        self.preprocess = T.Compose(self.transforms)
+        self.to_pil = T.ToPILImage()
+
+        model_name = get_model_name(w)
+
+        if w.suffix == '.pt':
+            model_url = get_model_url(w)
+            if not file_exists(w) and model_url is not None:
+                gdown.download(model_url, str(w), quiet=False)
+            elif file_exists(w):
+                pass
+            else:
+                print(f'No URL associated to the chosen StrongSORT weights ({w}). Choose between:')
+                show_downloadeable_models()
+                exit()
+
+        # Build model
+        self.model = build_model(
+            model_name,
+            num_classes=1,
+            pretrained=not (w and w.is_file()),
+            use_gpu=device
+        )
+
+        if self.pt:  # PyTorch
+            # populate model arch with weights
+            if w and w.is_file() and w.suffix == '.pt':
+                load_pretrained_weights(self.model, w)
+                
+            self.model.to(device).eval()
+            self.model.half() if self.fp16 else  self.model.float()
+        elif self.jit:
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            self.model = torch.jit.load(w)
+            self.model.half() if self.fp16 else self.model.float()
+        elif self.onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
+            cuda = torch.cuda.is_available() and device.type != 'cpu'
+            #check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
+            import onnxruntime
+            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+            self.session = onnxruntime.InferenceSession(str(w), providers=providers)
+        elif self.engine:  # TensorRT
+            LOGGER.info(f'Loading {w} for TensorRT inference...')
+            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
+            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
+            if device.type == 'cpu':
+                device = torch.device('cuda:0')
+            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            logger = trt.Logger(trt.Logger.INFO)
+            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
+                self.model_ = runtime.deserialize_cuda_engine(f.read())
+            self.context = self.model_.create_execution_context()
+            self.bindings = OrderedDict()
+            self.fp16 = False  # default updated below
+            dynamic = False
+            for index in range(self.model_.num_bindings):
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                if self.model_.binding_is_input(index):
+                    if -1 in tuple(self.model_.get_binding_shape(index)):  # dynamic
+                        dynamic = True
+                        self.context.set_binding_shape(index, tuple(self.model_.get_profile_shape(0, index)[2]))
+                    if dtype == np.float16:
+                        self.fp16 = True
+                shape = tuple(self.context.get_binding_shape(index))
+                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
+                self.bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
+            self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+            batch_size = self.bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+        elif self.xml:  # OpenVINO
+            LOGGER.info(f'Loading {w} for OpenVINO inference...')
+            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            from openvino.runtime import Core, Layout, get_batch
+            ie = Core()
+            if not Path(w).is_file():  # if not *.xml
+                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
+            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
+            if network.get_parameters()[0].get_layout().empty:
+                network.get_parameters()[0].set_layout(Layout("NCWH"))
+            batch_dim = get_batch(network)
+            if batch_dim.is_static:
+                batch_size = batch_dim.get_length()
+            self.executable_network = ie.compile_model(network, device_name="CPU")  # device_name="MYRIAD" for Intel NCS2
+            self.output_layer = next(iter(self.executable_network.outputs))
+        
+        elif self.tflite:
+            LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
+            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
+                from tflite_runtime.interpreter import Interpreter, load_delegate
+            except ImportError:
+                import tensorflow as tf
+                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
+            self.interpreter = tf.lite.Interpreter(model_path=w)
+            self.interpreter.allocate_tensors()
+            # Get input and output tensors.
+            self.input_details = self.interpreter.get_input_details()
+            self.output_details = self.interpreter.get_output_details()
+            
+            # Test model on random input data.
+            input_data = np.array(np.random.random_sample((1,256,128,3)), dtype=np.float32)
+            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
+            
+            self.interpreter.invoke()
+
+            # The function `get_tensor()` returns a copy of the tensor data.
+            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
+        else:
+            print('This model framework is not supported yet!')
+            exit()
+        
+        
+    @staticmethod
+    def model_type(p='path/to/model.pt'):
+        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
+        from trackers.reid_export import export_formats
+        sf = list(export_formats().Suffix)  # export suffixes
+        check_suffix(p, sf)  # checks
+        types = [s in Path(p).name for s in sf]
+        return types
+
+    def _preprocess(self, im_batch):
+
+        images = []
+        for element in im_batch:
+            image = self.to_pil(element)
+            image = self.preprocess(image)
+            images.append(image)
+
+        images = torch.stack(images, dim=0)
+        images = images.to(self.device)
+
+        return images
+    
+    
+    def forward(self, im_batch):
+        
+        # preprocess batch
+        im_batch = self._preprocess(im_batch)
+
+        # batch to half
+        if self.fp16 and im_batch.dtype != torch.float16:
+           im_batch = im_batch.half()
+
+        # batch processing
+        features = []
+        if self.pt:
+            features = self.model(im_batch)
+        elif self.jit:  # TorchScript
+            features = self.model(im_batch)
+        elif self.onnx:  # ONNX Runtime
+            im_batch = im_batch.cpu().numpy()  # torch to numpy
+            features = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im_batch})[0]
+        elif self.engine:  # TensorRT
+            if True and im_batch.shape != self.bindings['images'].shape:
+                i_in, i_out = (self.model_.get_binding_index(x) for x in ('images', 'output'))
+                self.context.set_binding_shape(i_in, im_batch.shape)  # reshape if dynamic
+                self.bindings['images'] = self.bindings['images']._replace(shape=im_batch.shape)
+                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))
+            s = self.bindings['images'].shape
+            assert im_batch.shape == s, f"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
+            self.binding_addrs['images'] = int(im_batch.data_ptr())
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            features = self.bindings['output'].data
+        elif self.xml:  # OpenVINO
+            im_batch = im_batch.cpu().numpy()  # FP32
+            features = self.executable_network([im_batch])[self.output_layer]
+        else:
+            print('Framework not supported at the moment, we are working on it...')
+            exit()
+
+        if isinstance(features, (list, tuple)):
+            return self.from_numpy(features[0]) if len(features) == 1 else [self.from_numpy(x) for x in features]
+        else:
+            return self.from_numpy(features)
+
+    def from_numpy(self, x):
+        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
+
+    def warmup(self, imgsz=[(256, 128, 3)]):
+        # Warmup model by running inference once
+        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.tflite
+        if any(warmup_types) and self.device.type != 'cpu':
+            im = [np.empty(*imgsz).astype(np.uint8)]  # input
+            for _ in range(2 if self.jit else 1):  #
+                self.forward(im)  # warmup