update

2024-11-27 15:37:10 +08:00
commit 3a5214c796
696 changed files with 56947 additions and 0 deletions
--- a/ytracking/init.py
+++ b/ytracking/init.py
--- a/ytracking/pycache/init.cpython-38.pyc
+++ b/ytracking/pycache/init.cpython-38.pyc
--- a/ytracking/pycache/export.cpython-39.pyc
+++ b/ytracking/pycache/export.cpython-39.pyc
--- a/ytracking/pycache/track_.cpython-38.pyc
+++ b/ytracking/pycache/track_.cpython-38.pyc
--- a/ytracking/models/init.py
+++ b/ytracking/models/init.py
--- a/ytracking/models/pycache/init.cpython-38.pyc
+++ b/ytracking/models/pycache/init.cpython-38.pyc
--- a/ytracking/models/pycache/init.cpython-39.pyc
+++ b/ytracking/models/pycache/init.cpython-39.pyc
--- a/ytracking/models/pycache/common.cpython-38.pyc
+++ b/ytracking/models/pycache/common.cpython-38.pyc
--- a/ytracking/models/pycache/common.cpython-39.pyc
+++ b/ytracking/models/pycache/common.cpython-39.pyc
--- a/ytracking/models/pycache/experimental.cpython-38.pyc
+++ b/ytracking/models/pycache/experimental.cpython-38.pyc
--- a/ytracking/models/pycache/experimental.cpython-39.pyc
+++ b/ytracking/models/pycache/experimental.cpython-39.pyc
--- a/ytracking/models/pycache/yolo.cpython-38.pyc
+++ b/ytracking/models/pycache/yolo.cpython-38.pyc
--- a/ytracking/models/pycache/yolo.cpython-39.pyc
+++ b/ytracking/models/pycache/yolo.cpython-39.pyc
--- a/ytracking/models/common.py
+++ b/ytracking/models/common.py
@ -0,0 +1,883 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Common modules
+"""
+
+import ast
+import contextlib
+import json
+import math
+import platform
+import warnings
+import zipfile
+from collections import OrderedDict, namedtuple
+from copy import copy
+from pathlib import Path
+from urllib.parse import urlparse
+
+import cv2
+import numpy as np
+import pandas as pd
+import requests
+import torch
+import torch.nn as nn
+from PIL import Image
+from torch.cuda import amp
+
+# Import 'ultralytics' package or install if if missing
+try:
+    import ultralytics
+
+    assert hasattr(ultralytics, '__version__')  # verify package is not directory
+except (ImportError, AssertionError):
+    import os
+
+    os.system('pip install -U ultralytics')
+    import ultralytics
+
+from ytracking.ultralytics.utils.plotting import Annotator, colors, save_one_box
+
+from ytracking.utils import TryExcept
+from ytracking.utils.dataloaders import exif_transpose, letterbox
+from ytracking.utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
+                           increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
+                           xyxy2xywh, yaml_load)
+from ytracking.utils.torch_utils import copy_attr, smart_inference_mode
+
+
+def autopad(k, p=None, d=1):  # kernel, padding, dilation
+    # Pad to 'same' shape outputs
+    if d > 1:
+        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+class Conv(nn.Module):
+    # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+    default_act = nn.SiLU()  # default activation
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def forward_fuse(self, x):
+        return self.act(self.conv(x))
+
+
+class DWConv(Conv):
+    # Depth-wise convolution
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
+        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
+
+
+class DWConvTranspose2d(nn.ConvTranspose2d):
+    # Depth-wise transpose convolution
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
+        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
+
+
+class TransformerLayer(nn.Module):
+    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
+    def __init__(self, c, num_heads):
+        super().__init__()
+        self.q = nn.Linear(c, c, bias=False)
+        self.k = nn.Linear(c, c, bias=False)
+        self.v = nn.Linear(c, c, bias=False)
+        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
+        self.fc1 = nn.Linear(c, c, bias=False)
+        self.fc2 = nn.Linear(c, c, bias=False)
+
+    def forward(self, x):
+        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
+        x = self.fc2(self.fc1(x)) + x
+        return x
+
+
+class TransformerBlock(nn.Module):
+    # Vision Transformer https://arxiv.org/abs/2010.11929
+    def __init__(self, c1, c2, num_heads, num_layers):
+        super().__init__()
+        self.conv = None
+        if c1 != c2:
+            self.conv = Conv(c1, c2)
+        self.linear = nn.Linear(c2, c2)  # learnable position embedding
+        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
+        self.c2 = c2
+
+    def forward(self, x):
+        if self.conv is not None:
+            x = self.conv(x)
+        b, _, w, h = x.shape
+        p = x.flatten(2).permute(2, 0, 1)
+        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.SiLU()
+        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
+        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
+
+
+class C3x(C3):
+    # C3 module with cross-convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
+
+
+class C3TR(C3):
+    # C3 module with TransformerBlock()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = TransformerBlock(c_, c_, 4, n)
+
+
+class C3SPP(C3):
+    # C3 module with SPP()
+    def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)
+        self.m = SPP(c_, c_, k)
+
+
+class C3Ghost(C3):
+    # C3 module with GhostBottleneck()
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__(c1, c2, n, shortcut, g, e)
+        c_ = int(c2 * e)  # hidden channels
+        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
+
+
+class SPP(nn.Module):
+    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class SPPF(nn.Module):
+    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
+    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
+            y1 = self.m(x)
+            y2 = self.m(y1)
+            return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
+
+
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
+        # self.contract = Contract(gain=2)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
+        # return self.conv(self.contract(x))
+
+
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super().__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat((y, self.cv2(y)), 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super().__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(
+            GhostConv(c1, c_, 1, 1),  # pw
+            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+            GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
+                                                                            act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
+class Contract(nn.Module):
+    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
+        s = self.gain
+        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
+        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
+        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
+
+
+class Expand(nn.Module):
+    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+
+    def forward(self, x):
+        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        s = self.gain
+        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
+        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
+        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
+
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super().__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        return torch.cat(x, self.d)
+
+
+class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for python inference on various backends
+    def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
+        # Usage:
+        #   PyTorch:              weights = *.pt
+        #   TorchScript:                    *.torchscript
+        #   ONNX Runtime:                   *.onnx
+        #   ONNX OpenCV DNN:                *.onnx --dnn
+        #   OpenVINO:                       *_openvino_model
+        #   CoreML:                         *.mlmodel
+        #   TensorRT:                       *.engine
+        #   TensorFlow SavedModel:          *_saved_model
+        #   TensorFlow GraphDef:            *.pb
+        #   TensorFlow Lite:                *.tflite
+        #   TensorFlow Edge TPU:            *_edgetpu.tflite
+        #   PaddlePaddle:                   *_paddle_model
+        from ytracking.models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
+
+        super().__init__()
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
+        fp16 &= pt or jit or onnx or engine or triton  # FP16
+        nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)
+        stride = 32  # default stride
+        cuda = torch.cuda.is_available() and device.type != 'cpu'  # use CUDA
+        if not (pt or triton):
+            w = attempt_download(w)  # download if not local
+
+        if pt:  # PyTorch
+            model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
+            stride = max(int(model.stride.max()), 32)  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            model.half() if fp16 else model.float()
+            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
+        elif jit:  # TorchScript
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            extra_files = {'config.txt': ''}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
+            model.half() if fp16 else model.float()
+            if extra_files['config.txt']:  # load metadata dict
+                d = json.loads(extra_files['config.txt'],
+                               object_hook=lambda d: {
+                                   int(k) if k.isdigit() else k: v
+                                   for k, v in d.items()})
+                stride, names = int(d['stride']), d['names']
+        elif dnn:  # ONNX OpenCV DNN
+            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
+            check_requirements('opencv-python>=4.5.4')
+            net = cv2.dnn.readNetFromONNX(w)
+        elif onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
+            check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
+            import onnxruntime
+            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
+            session = onnxruntime.InferenceSession(w, providers=providers)
+            output_names = [x.name for x in session.get_outputs()]
+            meta = session.get_modelmeta().custom_metadata_map  # metadata
+            if 'stride' in meta:
+                stride, names = int(meta['stride']), eval(meta['names'])
+        elif xml:  # OpenVINO
+            LOGGER.info(f'Loading {w} for OpenVINO inference...')
+            check_requirements('openvino>=2023.0')  # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            from openvino.runtime import Core, Layout, get_batch
+            core = Core()
+            if not Path(w).is_file():  # if not *.xml
+                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir
+            ov_model = core.read_model(model=w, weights=Path(w).with_suffix('.bin'))
+            if ov_model.get_parameters()[0].get_layout().empty:
+                ov_model.get_parameters()[0].set_layout(Layout('NCHW'))
+            batch_dim = get_batch(ov_model)
+            if batch_dim.is_static:
+                batch_size = batch_dim.get_length()
+            ov_compiled_model = core.compile_model(ov_model, device_name='AUTO')  # AUTO selects best available device
+            stride, names = self._load_metadata(Path(w).with_suffix('.yaml'))  # load metadata
+        elif engine:  # TensorRT
+            LOGGER.info(f'Loading {w} for TensorRT inference...')
+            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
+            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
+            if device.type == 'cpu':
+                device = torch.device('cuda:0')
+            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            logger = trt.Logger(trt.Logger.INFO)
+            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
+                model = runtime.deserialize_cuda_engine(f.read())
+            context = model.create_execution_context()
+            bindings = OrderedDict()
+            output_names = []
+            fp16 = False  # default updated below
+            dynamic = False
+            for i in range(model.num_bindings):
+                name = model.get_binding_name(i)
+                dtype = trt.nptype(model.get_binding_dtype(i))
+                if model.binding_is_input(i):
+                    if -1 in tuple(model.get_binding_shape(i)):  # dynamic
+                        dynamic = True
+                        context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
+                    if dtype == np.float16:
+                        fp16 = True
+                else:  # output
+                    output_names.append(name)
+                shape = tuple(context.get_binding_shape(i))
+                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
+                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
+            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
+            batch_size = bindings['images'].shape[0]  # if dynamic, this is instead max batch size
+        elif coreml:  # CoreML
+            LOGGER.info(f'Loading {w} for CoreML inference...')
+            import coremltools as ct
+            model = ct.models.MLModel(w)
+        elif saved_model:  # TF SavedModel
+            LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
+            import tensorflow as tf
+            keras = False  # assume TF1 saved_model
+            model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
+        elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+            LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
+            import tensorflow as tf
+
+            def wrap_frozen_graph(gd, inputs, outputs):
+                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), [])  # wrapped
+                ge = x.graph.as_graph_element
+                return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
+
+            def gd_outputs(gd):
+                name_list, input_list = [], []
+                for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
+                    name_list.append(node.name)
+                    input_list.extend(node.input)
+                return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
+
+            gd = tf.Graph().as_graph_def()  # TF GraphDef
+            with open(w, 'rb') as f:
+                gd.ParseFromString(f.read())
+            frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
+        elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
+                from tflite_runtime.interpreter import Interpreter, load_delegate
+            except ImportError:
+                import tensorflow as tf
+                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
+            if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
+                LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
+                delegate = {
+                    'Linux': 'libedgetpu.so.1',
+                    'Darwin': 'libedgetpu.1.dylib',
+                    'Windows': 'edgetpu.dll'}[platform.system()]
+                interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
+            else:  # TFLite
+                LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
+                interpreter = Interpreter(model_path=w)  # load TFLite model
+            interpreter.allocate_tensors()  # allocate
+            input_details = interpreter.get_input_details()  # inputs
+            output_details = interpreter.get_output_details()  # outputs
+            # load metadata
+            with contextlib.suppress(zipfile.BadZipFile):
+                with zipfile.ZipFile(w, 'r') as model:
+                    meta_file = model.namelist()[0]
+                    meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
+                    stride, names = int(meta['stride']), meta['names']
+        elif tfjs:  # TF.js
+            raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
+        elif paddle:  # PaddlePaddle
+            LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
+            check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
+            import paddle.inference as pdi
+            if not Path(w).is_file():  # if not *.pdmodel
+                w = next(Path(w).rglob('*.pdmodel'))  # get *.pdmodel file from *_paddle_model dir
+            weights = Path(w).with_suffix('.pdiparams')
+            config = pdi.Config(str(w), str(weights))
+            if cuda:
+                config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
+            predictor = pdi.create_predictor(config)
+            input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
+            output_names = predictor.get_output_names()
+        elif triton:  # NVIDIA Triton Inference Server
+            LOGGER.info(f'Using {w} as Triton Inference Server...')
+            check_requirements('tritonclient[all]')
+            from utils.triton import TritonRemoteModel
+            model = TritonRemoteModel(url=w)
+            nhwc = model.runtime.startswith('tensorflow')
+        else:
+            raise NotImplementedError(f'ERROR: {w} is not a supported format')
+
+        # class names
+        if 'names' not in locals():
+            names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
+        if names[0] == 'n01440764' and len(names) == 1000:  # ImageNet
+            names = yaml_load(ROOT / 'data/ImageNet.yaml')['names']  # human-readable names
+
+        self.__dict__.update(locals())  # assign all variables to self
+
+    def forward(self, im, augment=False, visualize=False):
+        # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
+        if self.fp16 and im.dtype != torch.float16:
+            im = im.half()  # to FP16
+        if self.nhwc:
+            im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)
+
+        if self.pt:  # PyTorch
+            y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
+        elif self.jit:  # TorchScript
+            y = self.model(im)
+        elif self.dnn:  # ONNX OpenCV DNN
+            im = im.cpu().numpy()  # torch to numpy
+            self.net.setInput(im)
+            y = self.net.forward()
+        elif self.onnx:  # ONNX Runtime
+            im = im.cpu().numpy()  # torch to numpy
+            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
+        elif self.xml:  # OpenVINO
+            im = im.cpu().numpy()  # FP32
+            y = list(self.ov_compiled_model(im).values())
+        elif self.engine:  # TensorRT
+            if self.dynamic and im.shape != self.bindings['images'].shape:
+                i = self.model.get_binding_index('images')
+                self.context.set_binding_shape(i, im.shape)  # reshape if dynamic
+                self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
+                for name in self.output_names:
+                    i = self.model.get_binding_index(name)
+                    self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
+            s = self.bindings['images'].shape
+            assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
+            self.binding_addrs['images'] = int(im.data_ptr())
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            y = [self.bindings[x].data for x in sorted(self.output_names)]
+        elif self.coreml:  # CoreML
+            im = im.cpu().numpy()
+            im = Image.fromarray((im[0] * 255).astype('uint8'))
+            # im = im.resize((192, 320), Image.BILINEAR)
+            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            if 'confidence' in y:
+                box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+                conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+                y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
+            else:
+                y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)
+        elif self.paddle:  # PaddlePaddle
+            im = im.cpu().numpy().astype(np.float32)
+            self.input_handle.copy_from_cpu(im)
+            self.predictor.run()
+            y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
+        elif self.triton:  # NVIDIA Triton Inference Server
+            y = self.model(im)
+        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
+            im = im.cpu().numpy()
+            if self.saved_model:  # SavedModel
+                y = self.model(im, training=False) if self.keras else self.model(im)
+            elif self.pb:  # GraphDef
+                y = self.frozen_func(x=self.tf.constant(im))
+            else:  # Lite or Edge TPU
+                input = self.input_details[0]
+                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                if int8:
+                    scale, zero_point = input['quantization']
+                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
+                self.interpreter.set_tensor(input['index'], im)
+                self.interpreter.invoke()
+                y = []
+                for output in self.output_details:
+                    x = self.interpreter.get_tensor(output['index'])
+                    if int8:
+                        scale, zero_point = output['quantization']
+                        x = (x.astype(np.float32) - zero_point) * scale  # re-scale
+                    y.append(x)
+            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
+            y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixels
+
+        if isinstance(y, (list, tuple)):
+            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
+        else:
+            return self.from_numpy(y)
+
+    def from_numpy(self, x):
+        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
+
+    def warmup(self, imgsz=(1, 3, 640, 640)):
+        # Warmup model by running inference once
+        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
+        if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
+            im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
+            for _ in range(2 if self.jit else 1):  #
+                self.forward(im)  # warmup
+
+    @staticmethod
+    def _model_type(p='path/to/model.pt'):
+        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
+        # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
+        from utils.export import export_formats
+        from utils.downloads import is_url
+        sf = list(export_formats().Suffix)  # export suffixes
+        if not is_url(p, check=False):
+            check_suffix(p, sf)  # checks
+        url = urlparse(p)  # if url may be Triton inference server
+        types = [s in Path(p).name for s in sf]
+        types[8] &= not types[9]  # tflite &= not edgetpu
+        triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
+        return types + [triton]
+
+    @staticmethod
+    def _load_metadata(f=Path('path/to/meta.yaml')):
+        # Load metadata from meta.yaml if it exists
+        if f.exists():
+            d = yaml_load(f)
+            return d['stride'], d['names']  # assign stride, names
+        return None, None
+
+
+class AutoShape(nn.Module):
+    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    agnostic = False  # NMS class-agnostic
+    multi_label = False  # NMS multiple labels per box
+    classes = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
+    max_det = 1000  # maximum number of detections per image
+    amp = False  # Automatic Mixed Precision (AMP) inference
+
+    def __init__(self, model, verbose=True):
+        super().__init__()
+        if verbose:
+            LOGGER.info('Adding AutoShape... ')
+        copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=())  # copy attributes
+        self.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instance
+        self.pt = not self.dmb or model.pt  # PyTorch model
+        self.model = model.eval()
+        if self.pt:
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            m.inplace = False  # Detect.inplace=False for safe multithread inference
+            m.export = True  # do not output loss values
+
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        if self.pt:
+            m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+
+    @smart_inference_mode()
+    def forward(self, ims, size=640, augment=False, profile=False):
+        # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
+        #   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath
+        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
+        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
+        #   numpy:           = np.zeros((640,1280,3))  # HWC
+        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
+        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+
+        dt = (Profile(), Profile(), Profile())
+        with dt[0]:
+            if isinstance(size, int):  # expand
+                size = (size, size)
+            p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # param
+            autocast = self.amp and (p.device.type != 'cpu')  # Automatic Mixed Precision (AMP) inference
+            if isinstance(ims, torch.Tensor):  # torch
+                with amp.autocast(autocast):
+                    return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference
+
+            # Pre-process
+            n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of images
+            shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+            for i, im in enumerate(ims):
+                f = f'image{i}'  # filename
+                if isinstance(im, (str, Path)):  # filename or uri
+                    im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
+                    im = np.asarray(exif_transpose(im))
+                elif isinstance(im, Image.Image):  # PIL Image
+                    im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
+                files.append(Path(f).with_suffix('.jpg').name)
+                if im.shape[0] < 5:  # image in CHW
+                    im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+                im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch input
+                s = im.shape[:2]  # HWC
+                shape0.append(s)  # image shape
+                g = max(size) / max(s)  # gain
+                shape1.append([int(y * g) for y in s])
+                ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
+            shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)]  # inf shape
+            x = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad
+            x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHW
+            x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
+
+        with amp.autocast(autocast):
+            # Inference
+            with dt[1]:
+                y = self.model(x, augment=augment)  # forward
+
+            # Post-process
+            with dt[2]:
+                y = non_max_suppression(y if self.dmb else y[0],
+                                        self.conf,
+                                        self.iou,
+                                        self.classes,
+                                        self.agnostic,
+                                        self.multi_label,
+                                        max_det=self.max_det)  # NMS
+                for i in range(n):
+                    scale_boxes(shape1, y[i][:, :4], shape0[i])
+
+            return Detections(ims, y, files, dt, self.names, x.shape)
+
+
+class Detections:
+    # YOLOv5 detections class for inference results
+    def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
+        super().__init__()
+        d = pred[0].device  # device
+        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizations
+        self.ims = ims  # list of images as numpy arrays
+        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
+        self.names = names  # class names
+        self.files = files  # image filenames
+        self.times = times  # profiling times
+        self.xyxy = pred  # xyxy pixels
+        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
+        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
+        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
+        self.n = len(self.pred)  # number of images (batch size)
+        self.t = tuple(x.t / self.n * 1E3 for x in times)  # timestamps (ms)
+        self.s = tuple(shape)  # inference BCHW shape
+
+    def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
+        s, crops = '', []
+        for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
+            s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '  # string
+            if pred.shape[0]:
+                for c in pred[:, -1].unique():
+                    n = (pred[:, -1] == c).sum()  # detections per class
+                    s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+                s = s.rstrip(', ')
+                if show or save or render or crop:
+                    annotator = Annotator(im, example=str(self.names))
+                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
+                        label = f'{self.names[int(cls)]} {conf:.2f}'
+                        if crop:
+                            file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
+                            crops.append({
+                                'box': box,
+                                'conf': conf,
+                                'cls': cls,
+                                'label': label,
+                                'im': save_one_box(box, im, file=file, save=save)})
+                        else:  # all others
+                            annotator.box_label(box, label if labels else '', color=colors(cls))
+                    im = annotator.im
+            else:
+                s += '(no detections)'
+
+            im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
+            if show:
+                if is_jupyter():
+                    from IPython.display import display
+                    display(im)
+                else:
+                    im.show(self.files[i])
+            if save:
+                f = self.files[i]
+                im.save(save_dir / f)  # save
+                if i == self.n - 1:
+                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
+            if render:
+                self.ims[i] = np.asarray(im)
+        if pprint:
+            s = s.lstrip('\n')
+            return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
+        if crop:
+            if save:
+                LOGGER.info(f'Saved results to {save_dir}\n')
+            return crops
+
+    @TryExcept('Showing images is not supported in this environment')
+    def show(self, labels=True):
+        self._run(show=True, labels=labels)  # show results
+
+    def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
+        save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dir
+        self._run(save=True, labels=labels, save_dir=save_dir)  # save results
+
+    def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
+        save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
+        return self._run(crop=True, save=save, save_dir=save_dir)  # crop results
+
+    def render(self, labels=True):
+        self._run(render=True, labels=labels)  # render results
+        return self.ims
+
+    def pandas(self):
+        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
+        new = copy(self)  # return copy
+        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
+        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
+        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
+            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
+            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
+        return new
+
+    def tolist(self):
+        # return a list of Detections objects, i.e. 'for result in results.tolist():'
+        r = range(self.n)  # iterable
+        x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
+        # for d in x:
+        #    for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
+        #        setattr(d, k, getattr(d, k)[0])  # pop out of list
+        return x
+
+    def print(self):
+        LOGGER.info(self.__str__())
+
+    def __len__(self):  # override len(results)
+        return self.n
+
+    def __str__(self):  # override print(results)
+        return self._run(pprint=True)  # print results
+
+    def __repr__(self):
+        return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
+
+
+class Proto(nn.Module):
+    # YOLOv5 mask Proto module for segmentation models
+    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
+        super().__init__()
+        self.cv1 = Conv(c1, c_, k=3)
+        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        self.cv2 = Conv(c_, c_, k=3)
+        self.cv3 = Conv(c_, c2)
+
+    def forward(self, x):
+        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
+
+
+class Classify(nn.Module):
+    # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self,
+                 c1,
+                 c2,
+                 k=1,
+                 s=1,
+                 p=None,
+                 g=1,
+                 dropout_p=0.0):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
+        super().__init__()
+        c_ = 1280  # efficientnet_b0 size
+        self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
+        self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
+        self.drop = nn.Dropout(p=dropout_p, inplace=True)
+        self.linear = nn.Linear(c_, c2)  # to x(b,c2)
+
+    def forward(self, x):
+        if isinstance(x, list):
+            x = torch.cat(x, 1)
+        return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
--- a/ytracking/models/experimental.py
+++ b/ytracking/models/experimental.py
@ -0,0 +1,111 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+Experimental modules
+"""
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from utils.downloads import attempt_download
+
+
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super().__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class MixConv2d(nn.Module):
+    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
+        super().__init__()
+        n = len(k)  # number of convolutions
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, n - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * n
+            a = np.eye(n + 1, n, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([
+            nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU()
+
+    def forward(self, x):
+        return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        y = [module(x, augment, profile, visualize)[0] for module in self]
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.stack(y).mean(0)  # mean ensemble
+        y = torch.cat(y, 1)  # nms ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, device=None, inplace=True, fuse=True):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    from models.yolo import Detect, Model
+
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        ckpt = torch.load(attempt_download(w), map_location='cpu')  # load
+        ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model
+
+        # Model compatibility updates
+        if not hasattr(ckpt, 'stride'):
+            ckpt.stride = torch.tensor([32.])
+        if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
+            ckpt.names = dict(enumerate(ckpt.names))  # convert to dict
+
+        model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval())  # model in eval mode
+
+    # Module updates
+    for m in model.modules():
+        t = type(m)
+        if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
+            m.inplace = inplace
+            if t is Detect and not isinstance(m.anchor_grid, list):
+                delattr(m, 'anchor_grid')
+                setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
+        elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
+            m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+
+    # Return model
+    if len(model) == 1:
+        return model[-1]
+
+    # Return detection ensemble
+    print(f'Ensemble created with {weights}\n')
+    for k in 'names', 'nc', 'yaml':
+        setattr(model, k, getattr(model[0], k))
+    model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride
+    assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
+    return model
--- a/ytracking/models/hub/anchors.yaml
+++ b/ytracking/models/hub/anchors.yaml
@ -0,0 +1,59 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+# Default anchors for COCO data
+
+
+# P5 -------------------------------------------------------------------------------------------------------------------
+# P5-640:
+anchors_p5_640:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+
+# P6 -------------------------------------------------------------------------------------------------------------------
+# P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
+anchors_p6_640:
+  - [9,11,  21,19,  17,41]  # P3/8
+  - [43,32,  39,70,  86,64]  # P4/16
+  - [65,131,  134,130,  120,265]  # P5/32
+  - [282,180,  247,354,  512,387]  # P6/64
+
+# P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
+anchors_p6_1280:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
+anchors_p6_1920:
+  - [28,41,  67,59,  57,141]  # P3/8
+  - [144,103,  129,227,  270,205]  # P4/16
+  - [209,452,  455,396,  358,812]  # P5/32
+  - [653,922,  1109,570,  1387,1187]  # P6/64
+
+
+# P7 -------------------------------------------------------------------------------------------------------------------
+# P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
+anchors_p7_640:
+  - [11,11,  13,30,  29,20]  # P3/8
+  - [30,46,  61,38,  39,92]  # P4/16
+  - [78,80,  146,66,  79,163]  # P5/32
+  - [149,150,  321,143,  157,303]  # P6/64
+  - [257,402,  359,290,  524,372]  # P7/128
+
+# P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
+anchors_p7_1280:
+  - [19,22,  54,36,  32,77]  # P3/8
+  - [70,83,  138,71,  75,173]  # P4/16
+  - [165,159,  148,334,  375,151]  # P5/32
+  - [334,317,  251,626,  499,474]  # P6/64
+  - [750,326,  534,814,  1079,818]  # P7/128
+
+# P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
+anchors_p7_1920:
+  - [29,34,  81,55,  47,115]  # P3/8
+  - [105,124,  207,107,  113,259]  # P4/16
+  - [247,238,  222,500,  563,227]  # P5/32
+  - [501,476,  376,939,  749,711]  # P6/64
+  - [1126,489,  801,1222,  1618,1227]  # P7/128
--- a/ytracking/models/hub/yolov3-spp.yaml
+++ b/ytracking/models/hub/yolov3-spp.yaml
@ -0,0 +1,51 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3-SPP head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, SPP, [512, [5, 9, 13]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov3-tiny.yaml
+++ b/ytracking/models/hub/yolov3-tiny.yaml
@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,14, 23,27, 37,58]  # P4/16
+  - [81,82, 135,169, 344,319]  # P5/32
+
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [16, 3, 1]],  # 0
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
+   [-1, 1, Conv, [32, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
+   [-1, 1, Conv, [64, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
+   [-1, 1, Conv, [128, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
+   [-1, 1, Conv, [256, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
+   [-1, 1, Conv, [512, 3, 1]],
+   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
+   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
+  ]
+
+# YOLOv3-tiny head
+head:
+  [[-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
+
+   [[19, 15], 1, Detect, [nc, anchors]],  # Detect(P4, P5)
+  ]
--- a/ytracking/models/hub/yolov3.yaml
+++ b/ytracking/models/hub/yolov3.yaml
@ -0,0 +1,51 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+
+# YOLOv3 head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+
+   [[27, 22, 15], 1, Detect, [nc, anchors]],   # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5-bifpn.yaml
+++ b/ytracking/models/hub/yolov5-bifpn.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 BiFPN head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5-fpn.yaml
+++ b/ytracking/models/hub/yolov5-fpn.yaml
@ -0,0 +1,42 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 FPN head
+head:
+  [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
+
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
+
+   [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5-p2.yaml
+++ b/ytracking/models/hub/yolov5-p2.yaml
@ -0,0 +1,54 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 2], 1, Concat, [1]],  # cat backbone P2
+   [-1, 1, C3, [128, False]],  # 21 (P2/4-xsmall)
+
+   [-1, 1, Conv, [128, 3, 2]],
+   [[-1, 18], 1, Concat, [1]],  # cat head P3
+   [-1, 3, C3, [256, False]],  # 24 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 27 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 30 (P5/32-large)
+
+   [[21, 24, 27, 30], 1, Detect, [nc, anchors]],  # Detect(P2, P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5-p34.yaml
+++ b/ytracking/models/hub/yolov5-p34.yaml
@ -0,0 +1,41 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ],  # 0-P1/2
+    [ -1, 1, Conv, [ 128, 3, 2 ] ],  # 1-P2/4
+    [ -1, 3, C3, [ 128 ] ],
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],  # 3-P3/8
+    [ -1, 6, C3, [ 256 ] ],
+    [ -1, 1, Conv, [ 512, 3, 2 ] ],  # 5-P4/16
+    [ -1, 9, C3, [ 512 ] ],
+    [ -1, 1, Conv, [ 1024, 3, 2 ] ],  # 7-P5/32
+    [ -1, 3, C3, [ 1024 ] ],
+    [ -1, 1, SPPF, [ 1024, 5 ] ],  # 9
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4) outputs
+head:
+  [ [ -1, 1, Conv, [ 512, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 6 ], 1, Concat, [ 1 ] ],  # cat backbone P4
+    [ -1, 3, C3, [ 512, False ] ],  # 13
+
+    [ -1, 1, Conv, [ 256, 1, 1 ] ],
+    [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
+    [ [ -1, 4 ], 1, Concat, [ 1 ] ],  # cat backbone P3
+    [ -1, 3, C3, [ 256, False ] ],  # 17 (P3/8-small)
+
+    [ -1, 1, Conv, [ 256, 3, 2 ] ],
+    [ [ -1, 14 ], 1, Concat, [ 1 ] ],  # cat head P4
+    [ -1, 3, C3, [ 512, False ] ],  # 20 (P4/16-medium)
+
+    [ [ 17, 20 ], 1, Detect, [ nc, anchors ] ],  # Detect(P3, P4)
+  ]
--- a/ytracking/models/hub/yolov5-p6.yaml
+++ b/ytracking/models/hub/yolov5-p6.yaml
@ -0,0 +1,56 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/hub/yolov5-p7.yaml
+++ b/ytracking/models/hub/yolov5-p7.yaml
@ -0,0 +1,67 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors: 3  # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
+   [-1, 3, C3, [1280]],
+   [-1, 1, SPPF, [1280, 5]],  # 13
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
+head:
+  [[-1, 1, Conv, [1024, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 10], 1, Concat, [1]],  # cat backbone P6
+   [-1, 3, C3, [1024, False]],  # 17
+
+   [-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 21
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 25
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 29 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 26], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 32 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 22], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 35 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 18], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 38 (P6/64-xlarge)
+
+   [-1, 1, Conv, [1024, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P7
+   [-1, 3, C3, [1280, False]],  # 41 (P7/128-xxlarge)
+
+   [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6, P7)
+  ]
--- a/ytracking/models/hub/yolov5-panet.yaml
+++ b/ytracking/models/hub/yolov5-panet.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 PANet head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5l6.yaml
+++ b/ytracking/models/hub/yolov5l6.yaml
@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/hub/yolov5m6.yaml
+++ b/ytracking/models/hub/yolov5m6.yaml
@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/hub/yolov5n6.yaml
+++ b/ytracking/models/hub/yolov5n6.yaml
@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/hub/yolov5s-LeakyReLU.yaml
+++ b/ytracking/models/hub/yolov5s-LeakyReLU.yaml
@ -0,0 +1,49 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+activation: nn.LeakyReLU(0.1)  # <----- Conv() activation used throughout entire YOLOv5 model
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5s-ghost.yaml
+++ b/ytracking/models/hub/yolov5s-ghost.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3Ghost, [128]],
+   [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3Ghost, [256]],
+   [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3Ghost, [512]],
+   [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3Ghost, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, GhostConv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3Ghost, [512, False]],  # 13
+
+   [-1, 1, GhostConv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3Ghost, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, GhostConv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3Ghost, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, GhostConv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3Ghost, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5s-transformer.yaml
+++ b/ytracking/models/hub/yolov5s-transformer.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/hub/yolov5s6.yaml
+++ b/ytracking/models/hub/yolov5s6.yaml
@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/hub/yolov5x6.yaml
+++ b/ytracking/models/hub/yolov5x6.yaml
@ -0,0 +1,60 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [19,27,  44,40,  38,94]  # P3/8
+  - [96,68,  86,152,  180,137]  # P4/16
+  - [140,301,  303,264,  238,542]  # P5/32
+  - [436,615,  739,380,  925,792]  # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+
+   [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
+  ]
--- a/ytracking/models/segment/yolov5l-seg.yaml
+++ b/ytracking/models/segment/yolov5l-seg.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/segment/yolov5m-seg.yaml
+++ b/ytracking/models/segment/yolov5m-seg.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/segment/yolov5n-seg.yaml
+++ b/ytracking/models/segment/yolov5n-seg.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/segment/yolov5s-seg.yaml
+++ b/ytracking/models/segment/yolov5s-seg.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.5  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/segment/yolov5x-seg.yaml
+++ b/ytracking/models/segment/yolov5x-seg.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/tf.py
+++ b/ytracking/models/tf.py
@ -0,0 +1,608 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+TensorFlow, Keras and TFLite versions of YOLOv5
+Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
+
+Usage:
+    $ python models/tf.py --weights yolov5s.pt
+
+Export:
+    $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
+"""
+
+import argparse
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
+
+import numpy as np
+import tensorflow as tf
+import torch
+import torch.nn as nn
+from tensorflow import keras
+
+from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
+                           DWConvTranspose2d, Focus, autopad)
+from models.experimental import MixConv2d, attempt_load
+from models.yolo import Detect, Segment
+from utils.activations import SiLU
+from utils.general import LOGGER, make_divisible, print_args
+
+
+class TFBN(keras.layers.Layer):
+    # TensorFlow BatchNormalization wrapper
+    def __init__(self, w=None):
+        super().__init__()
+        self.bn = keras.layers.BatchNormalization(
+            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
+            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
+            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
+            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
+            epsilon=w.eps)
+
+    def call(self, inputs):
+        return self.bn(inputs)
+
+
+class TFPad(keras.layers.Layer):
+    # Pad inputs in spatial dimensions 1 and 2
+    def __init__(self, pad):
+        super().__init__()
+        if isinstance(pad, int):
+            self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
+        else:  # tuple/list
+            self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
+
+    def call(self, inputs):
+        return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
+
+
+class TFConv(keras.layers.Layer):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
+        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
+        conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding='SAME' if s == 1 else 'VALID',
+            use_bias=not hasattr(w, 'bn'),
+            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConv(keras.layers.Layer):
+    # Depthwise convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
+        conv = keras.layers.DepthwiseConv2D(
+            kernel_size=k,
+            depth_multiplier=c2 // c1,
+            strides=s,
+            padding='SAME' if s == 1 else 'VALID',
+            use_bias=not hasattr(w, 'bn'),
+            depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConvTranspose2d(keras.layers.Layer):
+    # Depthwise ConvTranspose2d
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
+        # ch_in, ch_out, weights, kernel, stride, padding, groups
+        super().__init__()
+        assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
+        assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
+        weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
+        self.c1 = c1
+        self.conv = [
+            keras.layers.Conv2DTranspose(filters=1,
+                                         kernel_size=k,
+                                         strides=s,
+                                         padding='VALID',
+                                         output_padding=p2,
+                                         use_bias=True,
+                                         kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
+                                         bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
+
+    def call(self, inputs):
+        return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
+
+
+class TFFocus(keras.layers.Layer):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__()
+        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
+
+    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
+        # inputs = inputs / 255  # normalize 0-255 to 0-1
+        inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
+        return self.conv(tf.concat(inputs, 3))
+
+
+class TFBottleneck(keras.layers.Layer):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFCrossConv(keras.layers.Layer):
+    # Cross Convolution
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
+        self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFConv2d(keras.layers.Layer):
+    # Substitution for PyTorch nn.Conv2D
+    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        self.conv = keras.layers.Conv2D(filters=c2,
+                                        kernel_size=k,
+                                        strides=s,
+                                        padding='VALID',
+                                        use_bias=bias,
+                                        kernel_initializer=keras.initializers.Constant(
+                                            w.weight.permute(2, 3, 1, 0).numpy()),
+                                        bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
+
+    def call(self, inputs):
+        return self.conv(inputs)
+
+
+class TFBottleneckCSP(keras.layers.Layer):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
+        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
+        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
+        self.bn = TFBN(w.bn)
+        self.act = lambda x: keras.activations.swish(x)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        y1 = self.cv3(self.m(self.cv1(inputs)))
+        y2 = self.cv2(inputs)
+        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
+
+
+class TFC3(keras.layers.Layer):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFC3x(keras.layers.Layer):
+    # 3 module with cross-convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        # ch_in, ch_out, number, shortcut, groups, expansion
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([
+            TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFSPP(keras.layers.Layer):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
+        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
+
+    def call(self, inputs):
+        x = self.cv1(inputs)
+        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
+
+
+class TFSPPF(keras.layers.Layer):
+    # Spatial pyramid pooling-Fast layer
+    def __init__(self, c1, c2, k=5, w=None):
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
+        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
+
+    def call(self, inputs):
+        x = self.cv1(inputs)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
+
+
+class TFDetect(keras.layers.Layer):
+    # TF YOLOv5 Detect layer
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
+        super().__init__()
+        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [tf.zeros(1)] * self.nl  # init grid
+        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
+        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
+        self.training = False  # set to False after building model
+        self.imgsz = imgsz
+        for i in range(self.nl):
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            self.grid[i] = self._make_grid(nx, ny)
+
+    def call(self, inputs):
+        z = []  # inference output
+        x = []
+        for i in range(self.nl):
+            x.append(self.m[i](inputs[i]))
+            # x(bs,20,20,255) to x(bs,3,20,20,85)
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
+
+            if not self.training:  # inference
+                y = x[i]
+                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
+                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
+                # Normalize xywh to 0-1 to reduce calibration error
+                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
+                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), )
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
+        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
+
+
+class TFSegment(TFDetect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+
+    def call(self, x):
+        p = self.proto(x[0])
+        # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0]))  # (optional) full-size protos
+        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p)
+
+
+class TFProto(keras.layers.Layer):
+
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
+
+    def call(self, inputs):
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
+
+
+class TFUpsample(keras.layers.Layer):
+    # TF version of torch.nn.Upsample()
+    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
+        super().__init__()
+        assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
+        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
+        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
+        # with default arguments: align_corners=False, half_pixel_centers=False
+        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
+        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
+
+    def call(self, inputs):
+        return self.upsample(inputs)
+
+
+class TFConcat(keras.layers.Layer):
+    # TF version of torch.concat()
+    def __init__(self, dimension=1, w=None):
+        super().__init__()
+        assert dimension == 1, 'convert only NCHW to NHWC concat'
+        self.d = 3
+
+    def call(self, inputs):
+        return tf.concat(inputs, self.d)
+
+
+def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m_str = m
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except NameError:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [
+                nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3x]:
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3x]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+        elif m in [Detect, Segment]:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
+            args.append(imgsz)
+        else:
+            c2 = ch[f]
+
+        tf_m = eval('TF' + m_str.replace('nn.', ''))
+        m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
+            else tf_m(*args, w=model.model[i])  # module
+
+        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}')  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return keras.Sequential(layers), sorted(save)
+
+
+class TFModel:
+    # TF YOLOv5 model
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml['nc']:
+            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            self.yaml['nc'] = nc  # override yaml value
+        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
+
+    def predict(self,
+                inputs,
+                tf_nms=False,
+                agnostic_nms=False,
+                topk_per_class=100,
+                topk_all=100,
+                iou_thres=0.45,
+                conf_thres=0.25):
+        y = []  # outputs
+        x = inputs
+        for m in self.model.layers:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            x = m(x)  # run
+            y.append(x if m.i in self.savelist else None)  # save output
+
+        # Add TensorFlow NMS
+        if tf_nms:
+            boxes = self._xywh2xyxy(x[0][..., :4])
+            probs = x[0][:, :, 4:5]
+            classes = x[0][:, :, 5:]
+            scores = probs * classes
+            if agnostic_nms:
+                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
+            else:
+                boxes = tf.expand_dims(boxes, 2)
+                nms = tf.image.combined_non_max_suppression(boxes,
+                                                            scores,
+                                                            topk_per_class,
+                                                            topk_all,
+                                                            iou_thres,
+                                                            conf_thres,
+                                                            clip_boxes=False)
+            return (nms, )
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
+        # xywh = x[..., :4]  # x(6300,4) boxes
+        # conf = x[..., 4:5]  # x(6300,1) confidences
+        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
+        # return tf.concat([conf, cls, xywh], 1)
+
+    @staticmethod
+    def _xywh2xyxy(xywh):
+        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
+        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
+
+
+class AgnosticNMS(keras.layers.Layer):
+    # TF Agnostic NMS
+    def call(self, input, topk_all, iou_thres, conf_thres):
+        # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
+        return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
+                         input,
+                         fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
+                         name='agnostic_nms')
+
+    @staticmethod
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
+        boxes, classes, scores = x
+        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
+        scores_inp = tf.reduce_max(scores, -1)
+        selected_inds = tf.image.non_max_suppression(boxes,
+                                                     scores_inp,
+                                                     max_output_size=topk_all,
+                                                     iou_threshold=iou_thres,
+                                                     score_threshold=conf_thres)
+        selected_boxes = tf.gather(boxes, selected_inds)
+        padded_boxes = tf.pad(selected_boxes,
+                              paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
+                              mode='CONSTANT',
+                              constant_values=0.0)
+        selected_scores = tf.gather(scores_inp, selected_inds)
+        padded_scores = tf.pad(selected_scores,
+                               paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                               mode='CONSTANT',
+                               constant_values=-1.0)
+        selected_classes = tf.gather(class_inds, selected_inds)
+        padded_classes = tf.pad(selected_classes,
+                                paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+                                mode='CONSTANT',
+                                constant_values=-1.0)
+        valid_detections = tf.shape(selected_inds)[0]
+        return padded_boxes, padded_scores, padded_classes, valid_detections
+
+
+def activations(act=nn.SiLU):
+    # Returns TF activation from input PyTorch activation
+    if isinstance(act, nn.LeakyReLU):
+        return lambda x: keras.activations.relu(x, alpha=0.1)
+    elif isinstance(act, nn.Hardswish):
+        return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
+    elif isinstance(act, (nn.SiLU, SiLU)):
+        return lambda x: keras.activations.swish(x)
+    else:
+        raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
+
+
+def representative_dataset_gen(dataset, ncalib=100):
+    # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
+    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+        im = np.transpose(img, [1, 2, 0])
+        im = np.expand_dims(im, axis=0).astype(np.float32)
+        im /= 255
+        yield [im]
+        if n >= ncalib:
+            break
+
+
+def run(
+        weights=ROOT / 'yolov5s.pt',  # weights path
+        imgsz=(640, 640),  # inference size h,w
+        batch_size=1,  # batch size
+        dynamic=False,  # dynamic batch size
+):
+    # PyTorch model
+    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
+    _ = model(im)  # inference
+    model.info()
+
+    # TensorFlow model
+    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    _ = tf_model.predict(im)  # inference
+
+    # Keras model
+    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
+    keras_model.summary()
+
+    LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
+    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    run(**vars(opt))
+
+
+if __name__ == '__main__':
+    opt = parse_opt()
+    main(opt)
--- a/ytracking/models/yolo.py
+++ b/ytracking/models/yolo.py
@ -0,0 +1,391 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+"""
+YOLO-specific modules
+
+Usage:
+    $ python models/yolo.py --cfg yolov5s.yaml
+"""
+
+import argparse
+import contextlib
+import os
+import platform
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+if platform.system() != 'Windows':
+    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.common import *  # noqa
+from models.experimental import *  # noqa
+from utils.autoanchor import check_anchor_order
+from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
+from utils.plots import feature_visualization
+from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
+                               time_sync)
+
+try:
+    import thop  # for FLOPs computation
+except ImportError:
+    thop = None
+
+
+class Detect(nn.Module):
+    # YOLOv5 Detect head for detection models
+    stride = None  # strides computed during build
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
+
+    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
+        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
+
+    def forward(self, x):
+        z = []  # inference output
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+
+                if isinstance(self, Segment):  # (boxes + masks)
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
+                else:  # Detect (boxes only)
+                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
+                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf), 4)
+                z.append(y.view(bs, self.na * nx * ny, self.no))
+
+        return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x)
+
+    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
+        d = self.anchors[i].device
+        t = self.anchors[i].dtype
+        shape = 1, self.na, ny, nx, 2  # grid shape
+        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
+        yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
+        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
+        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
+        return grid, anchor_grid
+
+
+class Segment(Detect):
+    # YOLOv5 Segment head for segmentation models
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
+        super().__init__(nc, anchors, ch, inplace)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
+
+    def forward(self, x):
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
+
+
+class BaseModel(nn.Module):
+    # YOLOv5 base model
+    def forward(self, x, profile=False, visualize=False):
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+
+    def _forward_once(self, x, profile=False, visualize=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            if profile:
+                self._profile_one_layer(m, x, dt)
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+            if visualize:
+                feature_visualization(x, m.type, m.i, save_dir=visualize)
+        return x
+
+    def _profile_one_layer(self, m, x, dt):
+        c = m == self.model[-1]  # is final layer, copy input as inplace fix
+        o = thop.profile(m, inputs=(x.copy() if c else x, ), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPs
+        t = time_sync()
+        for _ in range(10):
+            m(x.copy() if c else x)
+        dt.append((time_sync() - t) * 100)
+        if m == self.model[0]:
+            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  module")
+        LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}')
+        if c:
+            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
+
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        LOGGER.info('Fusing layers... ')
+        for m in self.model.modules():
+            if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, 'bn')  # remove batchnorm
+                m.forward = m.forward_fuse  # update forward
+        self.info()
+        return self
+
+    def info(self, verbose=False, img_size=640):  # print model information
+        model_info(self, verbose, img_size)
+
+    def _apply(self, fn):
+        # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
+        self = super()._apply(fn)
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+
+
+class DetectionModel(BaseModel):
+    # YOLOv5 detection model
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg, encoding='ascii', errors='ignore') as f:
+                self.yaml = yaml.safe_load(f)  # model dict
+
+        # Define model
+        ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
+        if nc and nc != self.yaml['nc']:
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml['nc'] = nc  # override yaml value
+        if anchors:
+            LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
+            self.yaml['anchors'] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
+        self.inplace = self.yaml.get('inplace', True)
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+            m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))])  # forward
+            check_anchor_order(m)
+            m.anchors /= m.stride.view(-1, 1, 1)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        LOGGER.info('')
+
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        if augment:
+            return self._forward_augment(x)  # augmented inference, None
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+
+    def _forward_augment(self, x):
+        img_size = x.shape[-2:]  # height, width
+        s = [1, 0.83, 0.67]  # scales
+        f = [None, 3, None]  # flips (2-ud, 3-lr)
+        y = []  # outputs
+        for si, fi in zip(s, f):
+            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+            yi = self._forward_once(xi)[0]  # forward
+            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+            yi = self._descale_pred(yi, fi, si, img_size)
+            y.append(yi)
+        y = self._clip_augmented(y)  # clip augmented tails
+        return torch.cat(y, 1), None  # augmented inference, train
+
+    def _descale_pred(self, p, flips, scale, img_size):
+        # de-scale predictions following augmented inference (inverse operation)
+        if self.inplace:
+            p[..., :4] /= scale  # de-scale
+            if flips == 2:
+                p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
+            elif flips == 3:
+                p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
+        else:
+            x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
+            if flips == 2:
+                y = img_size[0] - y  # de-flip ud
+            elif flips == 3:
+                x = img_size[1] - x  # de-flip lr
+            p = torch.cat((x, y, wh, p[..., 4:]), -1)
+        return p
+
+    def _clip_augmented(self, y):
+        # Clip YOLOv5 augmented inference tails
+        nl = self.model[-1].nl  # number of detection layers (P3-P5)
+        g = sum(4 ** x for x in range(nl))  # grid points
+        e = 1  # exclude layer count
+        i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e))  # indices
+        y[0] = y[0][:, :-i]  # large
+        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
+        y[-1] = y[-1][:, i:]  # small
+        return y
+
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+
+Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
+
+
+class SegmentationModel(DetectionModel):
+    # YOLOv5 segmentation model
+    def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
+        super().__init__(cfg, ch, nc, anchors)
+
+
+class ClassificationModel(BaseModel):
+    # YOLOv5 classification model
+    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
+        super().__init__()
+        self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
+
+    def _from_detection_model(self, model, nc=1000, cutoff=10):
+        # Create a YOLOv5 classification model from a YOLOv5 detection model
+        if isinstance(model, DetectMultiBackend):
+            model = model.model  # unwrap DetectMultiBackend
+        model.model = model.model[:cutoff]  # backbone
+        m = model.model[-1]  # last layer
+        ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels  # ch into module
+        c = Classify(ch, nc)  # Classify()
+        c.i, c.f, c.type = m.i, m.f, 'models.common.Classify'  # index, from, type
+        model.model[-1] = c  # replace
+        self.model = model.model
+        self.stride = model.stride
+        self.save = []
+        self.nc = nc
+
+    def _from_yaml(self, cfg):
+        # Create a YOLOv5 classification model from a *.yaml file
+        self.model = None
+
+
+def parse_model(d, ch):  # model_dict, input_channels(3)
+    # Parse a YOLOv5 model.yaml dictionary
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            with contextlib.suppress(NameError):
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+
+        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in {
+                Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
+                BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
+            c1, c2 = ch[f], args[0]
+            if c2 != no:  # if not output
+                c2 = make_divisible(c2 * gw, 8)
+
+            args = [c1, c2, *args[1:]]
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[x] for x in f)
+        # TODO: channel, gw, gd
+        elif m in {Detect, Segment}:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, 8)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum(x.numel() for x in m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}')  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
+    parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--profile', action='store_true', help='profile model speed')
+    parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
+    parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
+    opt = parser.parse_args()
+    opt.cfg = check_yaml(opt.cfg)  # check YAML
+    print_args(vars(opt))
+    device = select_device(opt.device)
+
+    # Create model
+    im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
+    model = Model(opt.cfg).to(device)
+
+    # Options
+    if opt.line_profile:  # profile layer by layer
+        model(im, profile=True)
+
+    elif opt.profile:  # profile forward-backward
+        results = profile(input=im, ops=[model], n=3)
+
+    elif opt.test:  # test all models
+        for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
+            try:
+                _ = Model(cfg)
+            except Exception as e:
+                print(f'Error in {cfg}: {e}')
+
+    else:  # report fused model summary
+        model.fuse()
--- a/ytracking/models/yolov5l.yaml
+++ b/ytracking/models/yolov5l.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/yolov5m.yaml
+++ b/ytracking/models/yolov5m.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.67  # model depth multiple
+width_multiple: 0.75  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/yolov5n.yaml
+++ b/ytracking/models/yolov5n.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/yolov5s.yaml
+++ b/ytracking/models/yolov5s.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 0.33  # model depth multiple
+width_multiple: 0.50  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/models/yolov5x.yaml
+++ b/ytracking/models/yolov5x.yaml
@ -0,0 +1,48 @@
+# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
+
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.33  # model depth multiple
+width_multiple: 1.25  # layer channel multiple
+anchors:
+  - [10,13, 16,30, 33,23]  # P3/8
+  - [30,61, 62,45, 59,119]  # P4/16
+  - [116,90, 156,198, 373,326]  # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+
+   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
+  ]
--- a/ytracking/requirements.txt
+++ b/ytracking/requirements.txt
@ -0,0 +1,49 @@
+# YOLOv5 requirements
+# Usage: pip install -r requirements.txt
+
+# Base ------------------------------------------------------------------------
+gitpython>=3.1.30
+matplotlib>=3.3
+numpy>=1.22.2
+opencv-python>=4.1.1
+Pillow>=7.1.2
+psutil  # system resources
+PyYAML>=5.3.1
+requests>=2.23.0
+scipy>=1.4.1
+thop>=0.1.1  # FLOPs computation
+torch>=1.8.0  # see https://pytorch.org/get-started/locally (recommended)
+torchvision>=0.9.0
+tqdm>=4.64.0
+ultralytics>=8.0.147
+# protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
+
+# Logging ---------------------------------------------------------------------
+# tensorboard>=2.4.1
+# clearml>=1.2.0
+# comet
+
+# Plotting --------------------------------------------------------------------
+pandas>=1.1.4
+seaborn>=0.11.0
+
+# Export ----------------------------------------------------------------------
+# coremltools>=6.0  # CoreML export
+# onnx>=1.10.0  # ONNX export
+# onnx-simplifier>=0.4.1  # ONNX simplifier
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt  # TensorRT export
+# scikit-learn<=1.1.2  # CoreML quantization
+# tensorflow>=2.4.0  # TF exports (-cpu, -aarch64, -macos)
+# tensorflowjs>=3.9.0  # TF.js export
+# openvino-dev>=2023.0  # OpenVINO export
+
+# Deploy ----------------------------------------------------------------------
+setuptools>=65.5.1 # Snyk vulnerability fix
+# tritonclient[all]~=2.24.0
+
+# Extras ----------------------------------------------------------------------
+# ipython  # interactive notebook
+# mss  # screenshots
+# albumentations>=1.0.3
+# pycocotools>=2.0.6  # COCO mAP
--- a/ytracking/track_.py
+++ b/ytracking/track_.py
@ -0,0 +1,159 @@
+# YOLOv5 ğŸš€ by Ultralytics, AGPL-3.0 license
+import argparse
+import csv
+import os
+import platform
+import sys
+from pathlib import Path
+import glob
+import numpy as np
+import pickle
+import torch
+from tools.config import cfg
+
+sys.path.append('./ytracking')
+from models.common import DetectMultiBackend
+from utils.dataloaders import LoadImages
+from utils.general import (LOGGER, Profile, check_img_size, check_requirements, colorstr, cv2,
+                           increment_path, non_max_suppression, scale_boxes, strip_optimizer)
+from utils.torch_utils import select_device, smart_inference_mode
+
+'''集成跟踪模块，输出跟踪结果文件 .npy'''
+# from ultralytics.engine.results import Boxes    # Results
+# from ultralytics.utils import IterableSimpleNamespace, yaml_load
+from tracking.utils.plotting import Annotator, colors
+from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
+from tracking.trackers import BOTSORT, BYTETracker
+from tracking.utils.showtrack import drawtracks
+import time
+
+
+def init_trackers(tracker_yaml=None, bs=1):
+    """
+    Initialize trackers for object tracking during prediction.
+    """
+    # tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
+
+    TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
+    cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
+    trackers = []
+    for _ in range(bs):
+        tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
+        trackers.append(tracker)
+
+    return trackers
+
+
+@smart_inference_mode()
+def run(
+        # weights=cfg.tracking_model,  # model path or triton URL
+        Model,  # model path or triton URL
+        source=None,  # file/dir/URL/glob/screen/0(webcam)
+
+        project=r'./runs/detect',  # save results to project/name
+
+        tracker_yaml=cfg.botsort,
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        bs=1,  # batch_size
+        save_img=True,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        line_thickness=3,  # bounding box thickness (pixels)
+        half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
+):
+    if source is None:
+        raise ValueError("Have to provide --source argument")
+
+    # Load model
+    # device = select_device(device)
+    # model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
+    if Model is None:
+        raise ValueError("Have to provide --model argument")
+    model = Model.yoloModel
+    print(model.stride, model.names, model.pt)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Run inference
+    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
+
+    ##=============================生成文件夹 save_dir，存储检测跟踪图像 
+    source = str(source)
+    save_dir = Path(project) / Path(source).stem
+
+    # Dataloader
+    seen, dt = 0, (Profile(), Profile(), Profile())
+    dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+
+    ## ================================================= 生成跟踪器对象
+    tracker = init_trackers(tracker_yaml, bs)[0]
+    track_boxes = np.empty((0, 9), dtype=np.float32)
+    features_dict = {}
+    frameid_img = {}
+
+    for path, im, im0s, vid_cap, s in dataset:
+        # img preprocess
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+
+        # Inference
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(im, augment=augment, visualize=visualize)
+
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            frameid_img[seen] = im0s.copy()
+
+            p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
+
+            s += '%gx%g ' % im.shape[2:]  # print string
+
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # boxes_and_imgs.append((det.cpu().numpy(), im0, frame))
+                ## ================================================================ writed by WQG
+                det_tracking = Boxes(det, im0.shape).cpu().numpy()
+                tracks = tracker.update(det_tracking, im0)
+
+                if len(tracks):
+                    tracks[:, 7] = seen
+                    track_boxes = np.concatenate([track_boxes, tracks], axis=0)
+                    feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
+                    frame_id = tracks[0, 7]
+                    features_dict.update({int(frame_id): feat_dict})
+
+    return track_boxes, features_dict, frameid_img
+
+
+def main():
+    ROOT = Path(Path.cwd())
+    check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
+
+    optdict = {'weights': r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt",
+               'source': r"D:/Project/ieemoo-ai/testdata/88.mp4",
+               }
+    run(**optdict)
+
+
+if __name__ == '__main__':
+    main()
--- a/ytracking/track_.py.bak
+++ b/ytracking/track_.py.bak
@ -0,0 +1,226 @@
+# YOLOv5 ğŸš€ by Ultralytics, AGPL-3.0 license
+import argparse
+import csv
+import os
+import platform
+import sys
+from pathlib import Path
+import glob
+import numpy as np
+import pickle
+import torch
+
+# =============================================================================
+# FILE = Path(__file__).resolve()
+# ROOT = FILE.parents[0]  # YOLOv5 root directory
+# if str(ROOT) not in sys.path:
+#     sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+# =============================================================================
+sys.path.append('./ytracking')
+from models.common import DetectMultiBackend
+from utils.dataloaders import LoadImages
+from utils.general import (LOGGER, Profile, check_img_size, check_requirements, colorstr, cv2,
+                           increment_path, non_max_suppression, scale_boxes, strip_optimizer)
+from utils.torch_utils import select_device, smart_inference_mode
+
+'''集成跟踪模块，输出跟踪结果文件 .npy'''
+# from ultralytics.engine.results import Boxes    # Results
+# from ultralytics.utils import IterableSimpleNamespace, yaml_load
+from tracking.utils.plotting import Annotator, colors
+from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
+from tracking.trackers import BOTSORT, BYTETracker
+from tracking.utils.showtrack import drawtracks
+
+
+# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
+
+def init_trackers(tracker_yaml=None, bs=1):
+    """
+    Initialize trackers for object tracking during prediction.
+    """
+    # tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
+
+    TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
+
+    cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
+    trackers = []
+    for _ in range(bs):
+        tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
+        trackers.append(tracker)
+
+    return trackers
+
+
+@smart_inference_mode()
+def run(
+        weights=r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt",  # model path or triton URL
+        source=r"D:/Project/ieemoo-ai/testdata/88.mp4",  # file/dir/URL/glob/screen/0(webcam)
+
+        project=r'./runs/detect',  # save results to project/name
+        name='exp',  # save results to project/name
+
+        tracker_yaml="D:/Project/ieemoo-ai/ytracking/tracking/trackers/cfg/botsort.yaml",
+        imgsz=(640, 640),  # inference size (height, width)
+        conf_thres=0.25,  # confidence threshold
+        iou_thres=0.45,  # NMS IOU threshold
+        max_det=1000,  # maximum detections per image
+        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+        bs=1,  # batch_size
+        save_txt=False,  # save results to *.txt
+        save_img=True,  # do not save images/videos
+        classes=None,  # filter by class: --class 0, or --class 0 2 3
+        agnostic_nms=False,  # class-agnostic NMS
+        augment=False,  # augmented inference
+        visualize=False,  # visualize features
+        update=False,  # update all models
+        exist_ok=False,  # existing project/name ok, do not increment
+        line_thickness=3,  # bounding box thickness (pixels)
+        hide_labels=False,  # hide labels
+        hide_conf=False,  # hide confidencesL
+        half=False,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
+        vid_stride=1,  # video frame-rate stride
+):
+    # Load model
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Run inference
+    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
+
+    ##=============================生成文件夹 save_dir，存储检测跟踪图像 
+    source = str(source)
+    save_dir = Path(project) / Path(source).stem
+    if save_dir.exists():
+        print(Path(source).stem)
+        # return
+
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+    else:
+        save_dir.mkdir(parents=True, exist_ok=True)
+
+        # Dataloader
+    seen, dt = 0, (Profile(), Profile(), Profile())
+    dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+
+    ## ================================================= 生成跟踪器对象
+    tracker = init_trackers(tracker_yaml, bs)[0]
+    track_boxes = np.empty((0, 9), dtype=np.float32)
+    features_dict = {}
+
+    for path, im, im0s, vid_cap, s in dataset:
+        # img preprocess
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+        # Inference
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred = model(im, augment=augment, visualize=visualize)
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
+
+            im0_ant = im0.copy()
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            s += '%gx%g ' % im.shape[2:]  # print string
+
+            annotator = Annotator(im0_ant, line_width=line_thickness, example=str(names)) if save_img else None
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # boxes_and_imgs.append((det.cpu().numpy(), im0, frame)) 
+                ## ================================================================ writed by WQG
+                det_tracking = Boxes(det, im0.shape).cpu().numpy()
+                tracks = tracker.update(det_tracking, im0)
+
+                if len(tracks):
+                    track_boxes = np.concatenate([track_boxes, tracks], axis=0)
+                    feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
+                    frame_id = track_boxes[0, 7]
+                    features_dict.update({int(frame_id): feat_dict})
+
+                if annotator is not None:
+                    for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
+                        name = ('' if id == -1 else f'id:{int(id)} ') + names[int(cls)]
+                        label = None if hide_labels else (name if hide_conf else f'{name} {conf:.2f}')
+
+                        if id >= 0 and cls == 0:
+                            color = colors(int(cls), True)
+                        elif id >= 0 and cls != 0:
+                            color = colors(int(id), True)
+                        else:
+                            color = colors(19, True)  # 19为调色板的最后一个元素
+                        annotator.box_label(xyxy, label, color=color)
+
+            # Save tracking image
+            if annotator is not None:
+                save_path_img, ext = os.path.splitext(save_path)
+                imgpath = save_path_img + f"_{dataset.frame}.png"
+                cv2.imwrite(Path(imgpath), annotator.result())
+
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
+
+    ## ======================================================================== written by WQG
+    ''' track_boxes: Array, [x1, y1, x2, y2, track_id, score, cls, frame_index, box_id] '''
+    if save_img:
+        filename = os.path.split(save_path_img)[-1]
+        '''====== save in './run/detect/' ======'''
+        imgshow = drawtracks(track_boxes)
+        showpath_1 = save_path_img + "_show.png"
+        cv2.imwrite(Path(showpath_1), imgshow)
+
+        '''====== save tracks data ======'''
+        tracks_dir = Path('D:/Project/ieemoo-ai/ytracking/tracking/tracking/data/tracks/')
+        if not tracks_dir.exists():
+            tracks_dir.mkdir(parents=True, exist_ok=True)
+        tracks_path = tracks_dir.joinpath(filename + ".npy")
+        np.save(tracks_path, track_boxes)
+
+        '''====== save reid features data ======'''
+        feats_dir = Path('D:/Project/ieemoo-ai/ytracking/tracking/data/trackfeats/')
+        if not feats_dir.exists():
+            feats_dir.mkdir(parents=True, exist_ok=True)
+        feats_path = feats_dir.joinpath(f'{filename}.pkl')
+        with open(feats_path, 'wb') as file:
+            pickle.dump(features_dict, file)
+
+    # Print results
+    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image
+    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+
+
+def main():
+    ROOT = Path(Path.cwd())
+    check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
+
+    optdict = {'weights': r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt",
+               'source': r"D:/Project/ieemoo-ai/testdata/88.mp4",
+               }
+
+    run(**optdict)
+
+
+if __name__ == '__main__':
+    main()
--- a/ytracking/tracking/init.py
+++ b/ytracking/tracking/init.py
--- a/ytracking/tracking/pycache/init.cpython-38.pyc
+++ b/ytracking/tracking/pycache/init.cpython-38.pyc
--- a/ytracking/tracking/pycache/have_tracking.cpython-38.pyc
+++ b/ytracking/tracking/pycache/have_tracking.cpython-38.pyc
--- a/ytracking/tracking/dotrack/init.py
+++ b/ytracking/tracking/dotrack/init.py
--- a/ytracking/tracking/dotrack/pycache/init.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/init.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks.cpython-39.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks.cpython-39.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks_back.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks_back.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks_back.cpython-39.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks_back.cpython-39.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks_front.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks_front.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/dotracks_front.cpython-39.pyc
+++ b/ytracking/tracking/dotrack/pycache/dotracks_front.cpython-39.pyc
--- a/ytracking/tracking/dotrack/pycache/track_back.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/track_back.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/track_back.cpython-39.pyc
+++ b/ytracking/tracking/dotrack/pycache/track_back.cpython-39.pyc
--- a/ytracking/tracking/dotrack/pycache/track_front.cpython-38.pyc
+++ b/ytracking/tracking/dotrack/pycache/track_front.cpython-38.pyc
--- a/ytracking/tracking/dotrack/pycache/track_front.cpython-39.pyc
+++ b/ytracking/tracking/dotrack/pycache/track_front.cpython-39.pyc
--- a/ytracking/tracking/dotrack/cart_tempt/back_cartedge.png
+++ b/ytracking/tracking/dotrack/cart_tempt/back_cartedge.png
--- a/ytracking/tracking/dotrack/cart_tempt/back_incart.png
+++ b/ytracking/tracking/dotrack/cart_tempt/back_incart.png
--- a/ytracking/tracking/dotrack/cart_tempt/back_outcart.png
+++ b/ytracking/tracking/dotrack/cart_tempt/back_outcart.png
--- a/ytracking/tracking/dotrack/dotracks.py
+++ b/ytracking/tracking/dotrack/dotracks.py
@ -0,0 +1,357 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  4 18:16:01 2024
+
+@author: ym
+"""
+import numpy as np
+import cv2
+from pathlib import Path
+from scipy.spatial.distance import cdist
+
+from ytracking.tracking.utils.mergetrack import track_equal_track
+# curpath = Path(__file__).resolve().parents[0]
+from tools.config import cfg
+
+
+class MoveState:
+    """商品运动状态标志"""
+    Static = 0
+    DownWard = 1
+    UpWard = 2
+    FreeMove = 3
+    HandHborder = 4
+    Unknown = -1
+    
+class  ShoppingCart:
+    
+    def __init__(self, bboxes):
+        self.bboxes = bboxes
+        self.loadrate = self.load_rate()
+
+    def load_rate(self):
+        bboxes = self.bboxes
+
+        fid = min(bboxes[:, 7])
+        idx = bboxes[:, 7] == fid
+        boxes = bboxes[idx]
+        
+        temp = np.zeros(self.incart.shape, np.uint8)
+        for i in range(boxes.shape[0]):
+            x1, y1, x2, y2, tid = boxes[i, 0:5]
+            cv2.rectangle(temp, (int(x1), int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
+        
+        '''1. and 滤除购物车边框外的干扰'''
+        loadstate = cv2.bitwise_and(self.incart, temp)
+        
+        '''2. xor 得到购物车内内被填充的区域'''
+        # loadstate = cv2.bitwise_xor(self.incart, temp1)
+        
+        num_loadstate = cv2.countNonZero(loadstate)
+        num_incart = cv2.countNonZero(self.incart)
+        loadrate = num_loadstate / (num_incart+0.01)
+        
+        # edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png", cv2.IMREAD_GRAYSCALE)
+        # cv2.imwrite(f"./test/temp.png", cv2.add(temp, edgeline))
+        # cv2.imwrite(f"./test/incart.png", cv2.add(self.incart, edgeline))
+        # cv2.imwrite(f"./test/loadstate.png", cv2.add(loadstate, edgeline))
+
+        return loadrate
+    
+    
+        
+
+    @property    
+    def incart(self):
+        # img = cv2.imread(curpath/'cart_tempt'/'back_incart.png', cv2.IMREAD_GRAYSCALE)
+        img = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
+        ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
+        
+        return binary
+        
+    @property    
+    def outcart(self):
+        # img = cv2.imread(curpath/'cart_tempt'/'back_outcart.png', cv2.IMREAD_GRAYSCALE)
+        img = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
+        ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
+        
+        return binary
+    
+    @property    
+    def cartedge(self):
+        # img = cv2.imread(curpath/'cart_tempt'/'back_cartedge.png', cv2.IMREAD_GRAYSCALE)
+        img = cv2.imread(cfg.cartedge, cv2.IMREAD_GRAYSCALE)
+        ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
+        
+        return binary
+    
+class Track:
+    '''抽象基类，不能实例化对象'''
+    def __init__(self, boxes, imgshape=(1024, 1280)):
+        '''
+        boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index] 
+                0   1   2   3      4        5     6       7           8     
+        '''
+        self.boxes = boxes
+        self.tid = int(boxes[0, 4])
+        self.cls = int(boxes[0, 6])
+        self.frnum = boxes.shape[0]
+        self.imgBorder = False
+        self.imgshape = imgshape
+        self.state = MoveState.Unknown
+        '''轨迹开始帧、结束帧 ID'''
+        self.start_fid = int(np.min(boxes[:, 7]))
+        self.end_fid = int(np.max(boxes[:, 7]))
+        
+        # 根据需要，可以在子类中实现，降低顺序处理时的计算量
+        self.compute_cornpoints()
+        self.compute_cornpts_feats()
+        
+        ''' 基于 (x, y, w, h) 的一些计算，haved deprecated
+            最后一帧与第一帧间的位移：
+            vshift: 正值为向下，负值为向上
+            hshift: 正值为物品向中心移动，负值为向购物车边框两边移动
+        '''
+        x0, y0 = (boxes[:,0] + boxes[:,2])/2, (boxes[:, 1] + boxes[:, 3])/2
+        mw, mh = np.mean(boxes[:, 2]-boxes[:, 0]), np.mean((boxes[:, 3]-boxes[:, 1]))
+        self.mwh = np.mean((mw, mh))
+        self.Area = mw * mh
+        self.vshift = y0[-1] - boxes[0]
+        self.hshift = abs(x0[0]-self.imgshape[0]/2) - abs(x0[-1]-self.imgshape[0]/2)
+
+        
+        
+        # self.boxmean = [np.mean(self.boxes[:, k]) for k in range(4)]
+        # self.mwh = np.mean(self.boxmean[2:])
+        # self.Area = boxes[:,2] * boxes[:,3]
+        # self.vshift = boxes[-1, 1] - boxes[0, 1]
+        # self.hshift = abs(boxes[0, 0]-self.imgshape[0]/2) - abs(boxes[-1, 0]-self.imgshape[0]/2)
+        
+        
+    def compute_cornpoints(self):
+        '''
+        cornpoints 共10项，分别是个点的坐标值（x, y）
+            (center, top_left, top_right, bottom_left, bottom_right)
+        '''
+        boxes = self.boxes
+        cornpoints = np.zeros((self.frnum, 10))
+        cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
+        cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
+        cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
+        cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
+        cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
+        cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
+
+        self.cornpoints = cornpoints
+    def compute_cornpts_feats(self):
+        '''
+        '''
+        trajectory = []
+        trajlens = []
+        trajdist = []
+        trajrects = []
+        for k in range(5):
+            # diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
+            # trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
+            
+            X = self.cornpoints[:, 2*k:2*(k+1)]
+            
+            traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
+            trajectory.append(traj)
+            
+            trajlen = np.sum(traj)
+            trajlens.append(trajlen)
+            
+            ptdist = np.max(cdist(X, X))
+            trajdist.append(ptdist)
+              
+            rect = cv2.minAreaRect(X.astype(np.int64))
+            trajrects.append(rect)
+        
+        self.trajectory = trajectory
+        self.trajlens = trajlens
+        self.trajdist = trajdist
+        self.trajrects = trajrects
+    
+
+        
+    def trajfeature(self):       
+        '''
+        分两种情况计算轨迹特征（检测框边界不在图像边界范围内，在图像边界范围内）：
+          -最小长度轨迹：trajmin
+          -最小轨迹长度：trajlen_min
+          -最小轨迹欧氏距离：trajdist_max
+        '''
+        idx1 = self.trajlens.index(max(self.trajlens))
+        trajmax = self.trajectory[idx1]
+        trajlen_max = self.trajlens[idx1]
+        trajdist_max = self.trajdist[idx1]
+        if not self.isCornpoint:
+            idx2 = self.trajlens.index(min(self.trajlens))            
+            trajmin = self.trajectory[idx2]
+            trajlen_min = self.trajlens[idx2]
+            trajdist_min = self.trajdist[idx2]
+        else:
+            trajmin = self.trajectory[0]
+            trajlen_min = self.trajlens[0]
+            trajdist_min = self.trajdist[0]
+    
+        
+        '''最小轨迹长度/最大轨迹长度，越小，代表运动幅度越小'''
+        trajlen_rate = trajlen_min/(trajlen_max+0.0001)    
+        
+        '''最小轨迹欧氏距离/目标框尺度均值'''
+        trajdist_rate = trajdist_min/(self.mwh+0.0001)
+        
+        
+        
+        self.trajmin = trajmin
+        self.trajmax = trajmax
+        self.feature = [trajlen_min, trajlen_max, 
+                        trajdist_min,  trajdist_max,
+                        trajlen_rate, trajdist_rate]
+
+
+
+    
+class doTracks:
+    def __init__(self, bboxes, features_dict):
+        
+        self.bboxes = bboxes
+        self.features_dict = features_dict
+        self.frameid = set(bboxes[:, 7])
+        self.trackid = set(bboxes[:, 4])
+        self.lboxes = self.array2list()  
+        
+        '''对 self.tracks 中的元素进行分类，将 track 归入相应列表中'''
+        self.Static = []
+        self.DownWard = []
+        self.UpWard = []
+        self.FreeMove = []
+        
+        self.Hands = []
+        self.Kids = []
+        self.HandHborder = []
+        self.Disruptors = []
+        self.Residual = []
+        self.Merged = []
+    
+    def array2list(self):
+        '''
+            将 bboxes 变换为 track 列表
+            bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
+        Return：
+            lboxes：列表，列表中元素具有同一 track_id，x1y1x2y2 格式
+                    [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]        
+        '''
+        track_ids = set(self.bboxes[:, 4])
+        lboxes = []
+        for t_id in track_ids: 
+            # print(f"The ID is: {t_id}")
+            idx = np.where(self.bboxes[:, 4] == t_id)[0]
+            box = self.bboxes[idx, :]     
+
+            lboxes.append(box)
+        
+        return lboxes
+    
+    
+    def similarity(self):
+        nt = len(self.tracks)
+        similar_dict = {}
+        if nt >= 2:
+            for i in range(nt):
+                for j in range(i, nt):
+                    tracka = self.tracks[i]
+                    trackb = self.tracks[j]
+                    similar = self.feat_similarity(tracka, trackb)
+                    similar_dict.update({(tracka.tid, trackb.tid): similar})
+        return similar_dict
+    
+    
+    def feat_similarity(self, tracka, trackb, metric='cosine'):
+        boxes_a, boxes_b = tracka.boxes, trackb.boxes
+        na, nb = tracka.boxes.shape[0], trackb.boxes.shape[0]
+        feata, featb = [], []
+        for i in range(na):
+            fid, bid = tracka.boxes[i, 7:9]
+            feata.append(self.features_dict[fid][bid])
+        for i in range(nb):
+            fid, bid = trackb.boxes[i, 7:9]
+            featb.append(self.features_dict[fid][bid])
+        
+        feata = np.asarray(feata, dtype=np.float32)
+        featb = np.asarray(featb, dtype=np.float32)
+        similarity_matrix = 1-np.maximum(0.0, cdist(feata, featb, metric))
+        
+        feata_m = np.mean(feata, axis =0)[None,:]
+        featb_m = np.mean(featb, axis =0)[None,:]
+        simi_ab = 1 - cdist(feata_m, featb_m, metric)
+        print(f'tid {int(boxes_a[0, 4])} vs {int(boxes_b[0, 4])}: {simi_ab[0][0]}')
+        
+        # return np.max(similarity_matrix)   
+        return simi_ab
+    
+    def merge_tracks_loop(self, alist):
+        na, nb = len(alist), 0
+        while na!=nb:
+            na = len(alist)
+            alist = self.merge_tracks(alist)
+            nb = len(alist)
+        return alist
+    
+    def base_merge_tracks(self, Residual):
+        """
+        对不同id，但可能是同一商品的目标进行归并
+        """ 
+        mergedTracks = []
+        alist = [t for t in Residual]
+        while alist:
+            atrack = alist[0]
+            cur_list = []
+            cur_list.append(atrack)
+            alist.pop(0)
+            
+            blist = [b for b in alist]
+            alist = []
+            for btrack in blist:
+                if track_equal_track(atrack, btrack, self.features_dict):
+                    cur_list.append(btrack)
+                else:
+                    alist.append(btrack)
+                    
+            mergedTracks.append(cur_list)
+            
+        return mergedTracks
+    
+    
+    @staticmethod
+    def join_tracks(tlista, tlistb):
+        """Combine two lists of stracks into a single one."""
+        exists = {}
+        res = []
+        for t in tlista:
+            exists[t.tid] = 1
+            res.append(t)
+        for t in tlistb:
+            tid = t.tid
+            if not exists.get(tid, 0):
+                exists[tid] = 1
+                res.append(t)
+        return res
+
+    @staticmethod
+    def sub_tracks(tlista, tlistb):
+        track_ids_b = {t.tid for t in tlistb}
+        return [t for t in tlista if t.tid not in track_ids_b]
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
--- a/ytracking/tracking/dotrack/dotracks_back.py
+++ b/ytracking/tracking/dotrack/dotracks_back.py
@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  4 18:36:31 2024
+
+@author: ym
+"""
+import numpy as np
+from ytracking.tracking.dotrack.dotracks import doTracks, ShoppingCart
+from ytracking.tracking.dotrack.track_back import backTrack
+
+
+class doBackTracks(doTracks):
+    def __init__(self, bboxes, features_dict):
+        
+        super().__init__(bboxes, features_dict)
+        
+        self.tracks = [backTrack(b) for b in self.lboxes]
+        
+        # self.similar_dict = self.similarity()
+        
+        
+        self.shopcart = ShoppingCart(bboxes)
+
+# =============================================================================
+#     def array2list(self):
+#         '''           0, 1,   2,  3,     4,      5,    6,      7,          8
+#             bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
+#             lboxes：[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index] 
+#         '''
+# 
+#         track_ids = set(self.bboxes[:, 4])
+#         lboxes = []
+#         for t_id in track_ids:
+#             idx = np.where(self.bboxes[:, 4] == t_id)[0]
+#             box = self.bboxes[idx, :]
+#             
+#             x = (box[:, 0] + box[:, 2]) / 2
+#             y = (box[:, 1] + box[:, 3]) / 2
+#             
+#             # box: [x, y, w, h, track_id, score, cls, frame_index]
+#             box[:, 2] = box[:, 2] - box[:, 0]    
+#             box[:, 3] = box[:, 3] - box[:, 1]
+#             box[:, 0] = x
+#             box[:, 1] = y
+#       
+#             lboxes.append(box)
+# 
+#         
+#         return lboxes
+# =============================================================================
+
+            
+    
+    def classify(self):
+        '''
+        功能：对 tracks 中元素分类
+        
+        '''
+        
+        tracks = self.tracks
+        shopcart = self.shopcart
+
+        # 提取手的frame_id，并和动目标的frame_id 进行关联
+        hand_tracks = [t for t in tracks if t.cls==0]
+        self.Hands.extend(hand_tracks)
+        tracks = self.sub_tracks(tracks, hand_tracks)
+        
+        
+        
+        # 提取小孩的track，并计算状态：left, right, incart
+        kid_tracks = [t for t in tracks if t.cls==9]
+        kid_states = [self.kid_state(t) for t in kid_tracks]
+        self.Kids = [x for x in zip(kid_tracks, kid_states)]
+        
+        tracks = self.sub_tracks(tracks, kid_tracks)
+        
+
+
+        '''静态情况 1: 目标关键点最小相对运动轨迹 < 0.2, 指标值偏大
+           feature = [trajlen_min, trajlen_max, 
+                      trajdist_min,  trajdist_max,
+                      trajlen_rate, trajdist_rate]
+        '''        
+        track1 = [t for t in tracks if t.feature[5] < 0.2
+                                    or t.feature[3] < 120
+                                    ] 
+        
+        
+        '''静态情况 2: 目标初始状态为静止，适当放宽关键点最小相对运动轨迹 < 0.5'''
+        track2 = [t for t in tracks if t.static_index.size > 0
+                                   and t.static_index[0, 0] <= 2                      
+                                   and t.feature[5] < 0.5]
+        
+        '''静态情况 3: 目标初始状态和最终状态均为静止'''
+        track3 = [t for t in tracks if t.static_index.shape[0] >= 2
+                                   and t.static_index[0, 0] <= 2
+                                   and t.static_index[-1, 1] >= t.frnum-3]
+        
+        track12 = self.join_tracks(track1, track2) 
+        
+        '''提取静止状态的 track'''
+        static_tracks = self.join_tracks(track12, track3)
+        self.Static.extend(static_tracks)
+        
+        
+        '''剔除静止目标后的 tracks'''
+        tracks = self.sub_tracks(tracks, static_tracks)
+        
+        
+        '''购物框边界外具有运动状态的干扰目标'''
+        trcak4 = [t for t in tracks if self.isouttrack(t)]
+
+        tracks = self.sub_tracks(tracks, trcak4)
+
+        
+        '''轨迹循环归并'''
+        # merged_tracks = self.merge_tracks(tracks)
+        merged_tracks = self.merge_tracks_loop(tracks)
+
+        self.Residual = merged_tracks
+
+
+        
+    def merge_tracks(self, Residual):
+        """
+        对不同id，但可能是同一商品的目标进行归并
+        """ 
+        mergedTracks = self.base_merge_tracks(Residual)
+        
+        oldtracks, newtracks = [], []
+        for tracklist in mergedTracks:
+            if len(tracklist) > 1:
+                boxes = np.empty((0, 9), dtype=np.float32)
+                for i, track in enumerate(tracklist):
+                    if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
+                    iboxes = track.boxes.copy()
+                    
+                    iboxes[:, 4], iboxes[:, 6] = ntid, ncls
+                    boxes = np.concatenate((boxes, iboxes), axis=0)
+                    
+                    oldtracks.append(track)
+                    
+                fid_indices = np.argsort(boxes[:, 7])
+                boxes_fid = boxes[fid_indices]    
+                    
+                newtracks.append(backTrack(boxes_fid))
+            elif len(tracklist) == 1:
+                oldtracks.append(tracklist[0])
+                newtracks.append(tracklist[0])
+                
+
+        redu = self.sub_tracks(Residual, oldtracks)
+        merged = self.join_tracks(redu, newtracks)
+
+        return merged      
+        
+    def kid_state(self, track):
+        
+        left_dist = track.cornpoints[:, 2]
+        right_dist = 1024 - track.cornpoints[:, 4]
+        
+        if np.sum(left_dist<30)/track.frnum>0.8 and np.sum(right_dist>512)/track.frnum>0.7:
+            kidstate = "left"
+        elif np.sum(left_dist>512)/track.frnum>0.7 and np.sum(right_dist<30)/track.frnum>0.8:
+            kidstate = "right"
+        else:
+            kidstate = "incart"
+        
+        return kidstate
+    
+    
+
+    def hand_association(self):
+        """
+        分析商品和手之间的关联性
+        """
+        pass 
+    
+    def isouttrack(self, track):     
+        if track.posState <= 1:
+            isout = True
+        else:
+            isout = False  
+        return isout
+    
+    
+
+      
+       
+
+    def isuptrack(self, track):
+        Flag = False
+        
+        return Flag
+    
+    def isdowntrack(self, track):
+        Flag = False
+        
+        return Flag
+    
+    def isfreetrack(self, track):
+        Flag = False
+        
+        return Flag
--- a/ytracking/tracking/dotrack/dotracks_front.py
+++ b/ytracking/tracking/dotrack/dotracks_front.py
@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  4 18:38:20 2024
+
+@author: ym
+"""
+import numpy as np
+from ytracking.tracking.utils.mergetrack import track_equal_track
+from .dotracks import doTracks
+from .track_front import frontTrack
+
+class doFrontTracks(doTracks):
+    def __init__(self, bboxes, features_dict):
+        super().__init__(bboxes, features_dict)
+        
+        self.tracks = [frontTrack(b) for b in self.lboxes]
+
+    def classify(self):
+        '''功能：对 tracks 中元素分类 '''
+        
+        tracks = self.tracks
+
+        '''提取手的 tracks'''
+        hand_tracks = [t for t in tracks if t.cls==0]
+        
+        for htrack in hand_tracks:
+            htrack.extract_hand_features()
+        
+        
+        
+        
+        
+        self.Hands.extend(hand_tracks)
+        tracks = self.sub_tracks(tracks, hand_tracks)
+        
+        
+        
+        '''提取小孩的 tracks'''
+        kid_tracks = [t for t in tracks if t.cls==9]        
+        tracks = self.sub_tracks(tracks, kid_tracks)
+
+
+        '''静态 tracks'''        
+        static_tracks = [t for t in tracks if t.frnum>1 and t.is_Static()] 
+
+        
+        '''剔除静止目标后的 tracks'''
+        tracks = self.sub_tracks(tracks, static_tracks)
+         
+        '''轨迹循环归并'''
+        merged_tracks = self.merge_tracks_loop(tracks)
+        
+        tracks = [t for t in merged_tracks if t.frnum > 1]
+
+        for gtrack in tracks:
+            # print(f"Goods ID:{gtrack.tid}")
+            for htrack in hand_tracks:
+                if self.is_associate_with_hand(htrack, gtrack):
+                    gtrack.hands.append(htrack)
+                    
+        freemoved_tracks = [t for t in tracks if t.is_FreeMove()]
+        
+        tracks = self.sub_tracks(tracks, freemoved_tracks)
+                    
+
+        self.Residual = tracks
+  
+    def is_associate_with_hand(self, htrack, gtrack):
+        '''手部 Track、商品 Track 建立关联的依据：
+            a. 运动帧的帧索引有交集
+            b. 帧索引交集部分iou均大于0
+        '''
+        
+        assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
+        
+        hboxes = np.empty(shape=(0, 9), dtype = float)
+        gboxes = np.empty(shape=(0, 9), dtype = float)
+        
+        # start, end 为索引值，需要 start:(end+1)
+        for start, end in htrack.dynamic_y2:
+            hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)    
+        for start, end in gtrack.dynamic_y1:
+            gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
+        
+        hfids, gfids = hboxes[:, 7], gboxes[:, 7]
+        fids = set(hfids).intersection(set(gfids))
+        
+        if len(fids)==0:
+            return False
+        
+        
+        # print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
+        ious = []
+        for f in fids:
+            h = np.where(hfids==f)[0][0]
+            g = np.where(gfids==f)[0][0]
+            
+            x11, y11, x12, y12 = hboxes[h, 0:4]
+            x21, y21, x22, y22 = gboxes[g, 0:4]
+            
+            x1, y1 = max((x11, x21)), max((y11, y21))
+            x2, y2 = min((x12, x22)), min((y12, y22))
+            
+            union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
+            area1 = (x12 - x11) * (y12 - y11)
+            area2 = (x22 - x21) * (y22 - y21)
+            
+            iou = union / (area1 + area2 - union + 1e-6)
+            
+            if iou>0: 
+                ious.append(iou)
+        
+        return len(ious)
+    
+    
+    
+    def merge_tracks(self, Residual):
+        """
+        对不同id，但可能是同一商品的目标进行归并
+        """ 
+# =============================================================================
+#         mergedTracks = []
+#         alist = [t for t in Residual]
+#         while alist:
+#             atrack = alist[0]
+#             cur_list = []
+#             cur_list.append(atrack)
+#             alist.pop(0)
+#             
+#             blist = [b for b in alist]
+#             alist = []
+#             for btrack in blist:
+#                 if track_equal_track(atrack, btrack, self.features_dict):
+#                     cur_list.append(btrack)
+#                 else:
+#                     alist.append(btrack)
+#                     
+#             mergedTracks.append(cur_list)
+# =============================================================================
+        mergedTracks = self.base_merge_tracks(Residual)
+        
+        oldtracks, newtracks = [], []
+        for tracklist in mergedTracks:
+            if len(tracklist) > 1:
+                boxes = np.empty((0, 9), dtype=np.float32)
+                for i, track in enumerate(tracklist):
+                    if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
+                    iboxes = track.boxes.copy()
+                    iboxes[:, 4], iboxes[:, 6] = ntid, ncls
+                    boxes = np.concatenate((boxes, iboxes), axis=0)
+                    oldtracks.append(track)
+                
+                fid_indices = np.argsort(boxes[:, 7])
+                boxes_fid = boxes[fid_indices]
+                
+                newtracks.append(frontTrack(boxes_fid))
+            elif len(tracklist) == 1:
+                oldtracks.append(tracklist[0])
+                newtracks.append(tracklist[0])
+                
+
+        redu = self.sub_tracks(Residual, oldtracks)
+        merged = self.join_tracks(redu, newtracks)
+
+        return merged
+            
+        
+    
+        
+# =============================================================================
+#     def array2list(self):
+#         '''
+#             将 bboxes 变换为 track 列表
+#             bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
+#         Return：
+#             lboxes：列表，列表中元素具有同一 track_id，x1y1x2y2 格式
+#                     [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]        
+#         '''
+#         track_ids = set(self.bboxes[:, 4])
+#         lboxes = []
+#         for t_id in track_ids: 
+#             # print(f"The ID is: {t_id}")
+#             idx = np.where(self.bboxes[:, 4] == t_id)[0]
+#             box = self.bboxes[idx, :]     
+# 
+#             lboxes.append(box)
+#         
+#         return lboxes
+# =============================================================================
+    
--- a/ytracking/tracking/dotrack/track_back.py
+++ b/ytracking/tracking/dotrack/track_back.py
@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  4 18:28:47 2024
+
+@author: ym
+"""
+import cv2
+import numpy as np
+from scipy.spatial.distance import cdist
+from sklearn.decomposition import PCA
+from .dotracks import MoveState, Track
+from tools.config import cfg
+
+class backTrack(Track):
+    # boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
+    #         0,  1,  2,  3,    4,       5,    6,      7,         8
+    def __init__(self, boxes, imgshape=(1024, 1280)):
+        
+        super().__init__(boxes, imgshape)
+        
+        '''(cent_x, cent_y, mean_w, mean_h)'''
+        
+        
+        
+        
+        '''5个关键点（中心点、左上点、右上点、左下点、右下点 ）轨迹特征'''
+        self.compute_cornpts_feats()
+        
+        '''该函数依赖项: self.cornpoints'''
+        self.isCornpoint = self.isimgborder()
+        
+        '''该函数依赖项: self.cornpoints、self.isCornpoint，定义 10 个轨迹特征：
+            self.trajectory, self.trajmin, self.trajlens, self.trajdist
+            self.feature = [trajlen_min,   trajlen_max, 
+                            trajdist_min,  trajdist_max,
+                            trajlen_rate,  trajdist_rate] '''
+        self.trajfeature()
+             
+        
+
+
+        '''静止点帧索引'''
+        self.static_index = self.compute_static_fids()
+        
+        '''运动点帧索引(运动帧两端的静止帧索引)'''
+        self.moving_index = self.compute_moving_fids()
+        
+        '''该函数依赖项: self.cornpoints，定义 4 个商品位置变量：
+            self.Cent_isIncart, self.LB_isIncart, self.RB_isIncart
+            self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart'''    
+        self.PositionState()
+
+        '''self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
+           self.incartrates = incartrates'''
+        self.compute_ious_feat()
+        
+        # self.PCA()
+      
+
+     
+        
+    
+# =============================================================================
+#     def compute_cornpoints(self):
+#         '''
+#         cornpoints 共10项，分别是个点的坐标值（x, y）
+#             (center, top_left, top_right, bottom_left, bottom_right)
+#         '''
+#         boxes = self.boxes
+#         cornpoints = np.zeros((self.frnum, 10))
+#         cornpoints[:,0], cornpoints[:,1] = boxes[:, 0], boxes[:, 1]
+#         cornpoints[:,2], cornpoints[:,3] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
+#         cornpoints[:,4], cornpoints[:,5] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
+#         cornpoints[:,6], cornpoints[:,7] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
+#         cornpoints[:,8], cornpoints[:,9] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
+# 
+#         self.cornpoints = cornpoints
+# =============================================================================
+    
+    def isimgborder(self, BoundPixel=10, BoundThresh=0.3):
+    
+        x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
+        x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
+
+        cont1 = sum(abs(x1)<BoundPixel) / self.frnum > BoundThresh
+        cont2 = sum(abs(y1)<BoundPixel) / self.frnum > BoundThresh
+        cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) / self.frnum > BoundThresh
+        cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) / self.frnum > BoundThresh
+
+        cont = cont1 or cont2 or cont3 or cont4
+        isCornpoint = False
+        if cont:
+            isCornpoint = True
+        
+        return isCornpoint
+
+
+    def PositionState(self, camerType="back"):
+        '''
+        camerType: back, 后置摄像头
+                   front, 前置摄像头
+        '''
+        if camerType=="front":
+            incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
+        else:
+            incart = cv2.imread(cfg.incart_ftmp, cv2.IMREAD_GRAYSCALE)
+
+        xc, yc = self.cornpoints[:,0].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,1].clip(0,self.imgshape[1]-1).astype(np.int64)
+        x1, y1 = self.cornpoints[:,6].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,7].clip(0,self.imgshape[1]-1).astype(np.int64)
+        x2, y2 = self.cornpoints[:,8].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,9].clip(0,self.imgshape[1]-1).astype(np.int64)
+        
+        # print(self.tid)
+        Cent_inCartnum = np.count_nonzero(incart[(yc, xc)])
+        LB_inCartnum = np.count_nonzero(incart[(y1, x1)])
+        RB_inCartnum = np.count_nonzero(incart[(y2, x2)])
+        
+        self.Cent_isIncart = False
+        self.LB_isIncart = False
+        self.RB_isIncart = False
+        if Cent_inCartnum: self.Cent_isIncart = True
+        if LB_inCartnum: self.LB_isIncart = True
+        if RB_inCartnum: self.RB_isIncart = True
+        
+        self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart
+        
+        
+        
+    def PCA(self):
+        self.pca = PCA()
+        
+        X = self.cornpoints[:, 0:2]
+        self.pca.fit(X)
+        
+
+    
+
+
+    def compute_ious_feat(self):
+        '''输出：
+            self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
+            self.incartrates = incartrates，
+        其中：        
+            boxes流：track中所有boxes形成的轨迹图，可分为三部分：incart, outcart, cartboarder
+            incart_iou, outcart_iou, cartboarder_iou：各部分和 boxes流的 iou。
+            incart_iou = 0，track在购物车外，
+            outcart_iou = 0，track在购物车内，也可能是通过左下角、右下角置入购物车，
+            maxbox_iou, minbox_iou：track中最大、最小 box 和boxes流的iou，二者差值越小，越接近 1，表明track的运动型越小。
+            incartrates: 各box和incart的iou时序，由小变大，反应的是置入过程，由大变小，反应的是取出过程
+        '''
+        incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
+        outcart = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
+        cartboarder = cv2.imread(cfg.cartboarder, cv2.IMREAD_GRAYSCALE)
+        
+        incartrates = []
+        temp = np.zeros(incart.shape, np.uint8)
+        maxarea, minarea = 0, self.imgshape[0]*self.imgshape[1]
+        for i in range(self.frnum):
+            # x, y, w, h = self.boxes[i, 0:4]
+            
+            x = (self.boxes[i, 2] + self.boxes[i, 0]) / 2
+            w = (self.boxes[i, 2] - self.boxes[i, 0]) / 2
+            y = (self.boxes[i, 3] + self.boxes[i, 1]) / 2
+            h = (self.boxes[i, 3] - self.boxes[i, 1]) / 2
+            
+            
+            if w*h > maxarea: maxarea = w*h
+            if w*h < minarea: minarea = w*h
+            cv2.rectangle(temp, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
+            
+            temp1 = np.zeros(incart.shape, np.uint8)
+            cv2.rectangle(temp1, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
+            temp2 = cv2.bitwise_and(incart, temp1)
+            inrate = cv2.countNonZero(temp1)/(w*h)
+            incartrates.append(inrate)
+
+        isincart = cv2.bitwise_and(incart, temp)
+        isoutcart = cv2.bitwise_and(outcart, temp)
+        iscartboarder = cv2.bitwise_and(cartboarder, temp)
+        
+        num_temp = cv2.countNonZero(temp)
+        num_incart = cv2.countNonZero(isincart)
+        num_outcart = cv2.countNonZero(isoutcart)
+        num_cartboarder = cv2.countNonZero(iscartboarder)
+
+        incart_iou = num_incart/num_temp
+        outcart_iou = num_outcart/num_temp
+        cartboarder_iou = num_cartboarder/num_temp
+        maxbox_iou = maxarea/num_temp
+        minbox_iou = minarea/num_temp
+        
+        self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
+        self.incartrates = incartrates
+        
+
+ 
+    
+    def compute_static_fids(self, thresh1 = 12, thresh2 = 3):
+        '''
+        计算 track 的轨迹中相对处于静止状态的轨迹点的（start_frame_id, end_frame_id）
+        thresh1: 相邻两帧目标中心点是否静止的的阈值，以像素为单位，
+        thresh2: 连续捕捉到目标处于静止状态的帧数
+        '''
+        
+        BoundPixel = 8
+        x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
+        x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
+        cont1 = sum(abs(x1)<BoundPixel) > 3
+        # cont2 = sum(abs(y1)<BoundPixel)  > 3
+        cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) > 3
+        # cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) > 3
+        cont = not(cont1 or cont3)
+        
+        ## ============== 下一步，启用中心点，选择具有最小运动幅度的角点作为参考点
+   
+        static_index = []    
+        if self.frnum>=2 and cont:
+            x1 = self.boxes[1:,7]
+            x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
+            dist_adjc = np.interp(x2, x1, self.trajmin)
+        
+        
+            # dist_adjc = self.trajmin
+            
+            static_thresh = (dist_adjc < thresh1)[:, None].astype(np.uint8)
+            static_cnts, _ = cv2.findContours(static_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
+        
+            for cnt in static_cnts:
+                _, start, _, num = cv2.boundingRect(cnt)      
+                end = start + num
+                if num <= thresh2:
+                    continue 
+                static_index.append((start, end))
+            
+# =============================================================================
+#         '''========= 输出帧id，不太合适 ========='''
+#         static_fids = []
+#         for i in range(len(static_index)):
+#             i1, i2 = static_index[i]
+#             fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
+#             static_fids.append([fid1, fid2])
+#         static_fids = np.array(static_fids)
+# =============================================================================
+  
+        static_index = np.array(static_index)
+        if static_index.size:
+            indx = np.argsort(static_index[:, 0])
+            static_index = static_index[indx]
+        
+        return static_index
+            
+    def compute_moving_fids(self, thresh1 = 12, thresh2 = 3):
+        '''
+        计算 track 的轨迹中运动轨迹点的（start_frame_id, end_frame_id）
+        thresh1: 相邻两帧目标中心点是否运动的阈值，以像素为单位，
+        thresh2: 连续捕捉到目标连续运动的帧数
+        目标：
+            1. 计算轨迹方向
+            2. 计算和手部运动的关联性
+        '''
+        moving_index = []
+        if self.frnum>=2:
+            x1 = self.boxes[1:,7]
+            x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
+            dist_adjc = np.interp(x2, x1, self.trajmin)
+
+            moving_thresh = (dist_adjc >= thresh1)[:, None].astype(np.uint8)
+            moving_cnts, _ = cv2.findContours(moving_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
+
+            for cnt in moving_cnts:
+                _, start, _, num = cv2.boundingRect(cnt)      
+                if num < thresh2:
+                    continue
+                end = start + num
+                moving_index.append((start, end))
+            
+# =============================================================================
+#         '''========= 输出帧id，不太合适 ========='''
+#         moving_fids = []
+#         for i in range(len(moving_index)):
+#             i1, i2 = moving_index[i]
+#             fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
+#             moving_fids.append([fid1, fid2])
+#         moving_fids = np.array(moving_fids)
+# =============================================================================
+        moving_index = np.array(moving_index)
+        if moving_index.size:
+            indx = np.argsort(moving_index[:, 0])
+            moving_index = moving_index[indx]
+        
+        return moving_index     
+    
+
+        
+    
+    def compute_distance(self):
+        pass
+    
+    
+    def move_start_fid(self):
+        pass
+    
+    
+    def move_end_fid(self):
+        pass
--- a/ytracking/tracking/dotrack/track_front.py
+++ b/ytracking/tracking/dotrack/track_front.py
@ -0,0 +1,275 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar  4 18:33:01 2024
+
+@author: ym
+"""
+import numpy as np
+from sklearn.cluster import KMeans
+from .dotracks import MoveState, Track
+
+
+class frontTrack(Track):
+    # boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
+    #         0,  1,  2,  3,    4,        5,    6,      7,         8
+    def __init__(self, boxes, imgshape=(1024, 1280)):
+        
+        super().__init__(boxes, imgshape)
+        self.hands = []
+        
+        self.mwh = np.mean((self.boxes[:, 2] - self.boxes[:, 0]) *
+                           (self.boxes[:, 3] - self.boxes[:, 1]))
+        
+        
+        '''5个关键点（中心点、左上点、右上点、左下点、右下点 ）轨迹特征'''
+        self.compute_cornpts_feats()
+        
+        
+        self.CART_HIGH_THRESH1 = imgshape[1]/2.98
+        
+        
+        
+        
+        # if self.tid==10:
+        #     print(f"ID: {self.tid}")
+        
+        '''y1、y2静止状态区间，值是 boxes 中对 axis=0 的索引，不是帧索引'''
+        self.static_y1, self.dynamic_y1 = self.compute_static_fids(boxes[:, 1])
+        self.static_y2, self.dynamic_y2 = self.compute_static_fids(boxes[:, 3])
+        
+        self.isCornpoint = self.is_left_right_cornpoint()
+        self.isBotmpoint = self.is_bottom_cornpoint()
+        
+        self.trajfeature()
+        
+        '''手部状态分析'''
+        self.HAND_STATIC_THRESH = 100
+        self.CART_POSIT_0 = 430
+        self.CART_POSIT_1 = 620
+        
+        
+        
+
+    def is_left_right_cornpoint(self):
+        ''' 基于 all(boxes)，
+         boxes左下角点和图像左下角点重叠 或
+         boxes右下角点和图像左下角点重叠
+        '''
+        x1, y1 = self.boxes[:, 0], self.boxes[:, 1]
+        x2, y2 = self.boxes[:, 2], self.boxes[:, 3]
+        
+        # Left-Bottom cornpoint
+        condt1 = all(x1 < 5) and all(y2 > self.imgshape[1]-5)
+        
+        # Right-Bottom cornpoint
+        condt2 = all(x2 > self.imgshape[0]-5) and all(y2 > self.imgshape[1]-5)
+        
+        condt = condt1 or condt2    
+        
+        return condt
+    
+    def is_edge_cornpoint(self):
+        '''基于 all(boxes)，boxes是否和图像左右边缘重叠'''
+        x1, x2 = self.boxes[:, 0], self.boxes[:, 2]
+        condt = all(x1 < 3) or all(x2 > self.imgshape[0]-3)
+        
+        return condt
+
+    def is_bottom_cornpoint(self):
+        '''基于 all(boxes)，boxes是否和图像下边缘重叠'''
+        condt = all(self.boxes[:, 3] > self.imgshape[1]-20)
+        
+        return condt
+ 
+    def compute_static_fids(self, y, STATIC_THRESH = 8):
+        '''
+        前摄时，y一般选择为 box 的 y1 坐标，且需限定商品在购物车内。
+        inputs：
+            y：1D array，
+        parameters：
+            STATIC_THRESH：轨迹处于静止状态的阈值。
+        outputs：
+            输出为差分值小于 STATIC_THRESH 的y中元素的（start, end）索引
+            ranges = [(x1, y1), 
+                      (x1, y1), 
+                      ...]
+        '''
+        # print(f"The ID is: {self.tid}")
+        
+        det_y = np.diff(y, axis=0)
+        ranges, rangex = [], []
+        
+        static_indices = np.where(np.abs(det_y) < STATIC_THRESH)[0]
+        
+        if len(static_indices) == 0:
+            rangex.append((0, len(y)-1))
+            return ranges, rangex
+        
+        start_index = static_indices[0]
+        
+        for i in range(1, len(static_indices)):
+            if static_indices[i] != static_indices[i-1] + 1:
+                ranges.append((start_index, static_indices[i-1] + 1))
+                start_index = static_indices[i]
+        ranges.append((start_index, static_indices[-1] + 1))
+        
+        if len(ranges) == 0:
+            rangex.append((0, len(y)-1))
+            return ranges, rangex
+        
+        idx1, idx2 = ranges[0][0], ranges[-1][1]
+        
+        if idx1 != 0:
+            rangex.append((0, idx1))
+
+        # 轨迹的最后阶段是运动状态
+        for k in range(1, len(ranges)):
+            index1 = ranges[k-1][1]
+            index2 = ranges[k][0]
+            rangex.append((index1, index2))
+            
+        if idx2 != len(y)-1:
+            rangex.append((idx2, len(y)-1))
+            
+        return ranges, rangex
+    
+    def is_Static(self):
+        assert self.frnum > 1, "boxes number must greater than 1"
+        # print(f"The ID is: {self.tid}")
+        
+        # 手部和小孩目标不考虑
+        if self.cls == 0 or self.cls == 9: 
+            return False
+        
+        # boxes 全部 y2=1280
+        if self.isBotmpoint:
+            return True
+        
+        boxes = self.boxes
+        y0 = (boxes[:, 1]+boxes[:, 3])/2        
+        
+        ## 纵轴矢量和
+        sum_y0 = y0[-1] - y0[0]
+        sum_y1 = boxes[-1, 1]-boxes[0, 1]
+        sum_y2 = boxes[-1, 3]-boxes[0, 3]
+        
+        # 一些需要考虑的特殊情况
+        isbottom = max(boxes[:, 3]) > 1280-3
+        istop = min(boxes[:, 1]) < 3
+        isincart = min(y0) > self.CART_HIGH_THRESH1
+        uncert = abs(sum_y1)<100 and abs(sum_y2)<100
+        
+        '''初始条件：商品中心点始终在购物车内、'''
+        condt0 = max((boxes[:, 1]+boxes[:, 3])/2) > self.CART_HIGH_THRESH1 
+        
+        '''条件1：轨迹运动纵向和（y1 或 y2）描述商品轨迹长度，存在情况：
+            (1). 检测框可能与图像上下边缘重合，
+            (2). 上边或下边存在跳动
+        '''
+        if isbottom and istop:
+            condt1 = abs(sum_y0) < 300
+        elif isbottom:   # y2在底部，用y1表征运动
+            condt1 = sum_y1 > -120 and abs(sum_y0)<80 # 有底部点，方向向上阈值小于100
+        elif istop:      # y1在顶部，用y2表征运动
+            condt1 = abs(sum_y2) < 100
+        else:
+            condt1 = (abs(sum_y1) < 30 or abs(sum_y2)<30)
+            
+        '''条件2：轨迹的开始和结束阶段均处于静止状态, 利用静止状态区间判断，用 y1
+                  a. 商品在购物车内，
+                  b. 检测框的起始阶段和结束阶段均为静止状态
+                  c. 静止帧长度 > 3'''
+        
+        condt2 = False
+        if len(self.static_y1)>=2: 
+            condt_s0 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 3
+            condt_s1 = self.static_y1[-1][1]==self.frnum-1 and self.static_y1[-1][1] - self.static_y1[-1][0] >= 3 
+            condt2 = condt_s0 and condt_s1 and isincart
+
+
+        condt = condt0 and (condt1 or condt2)
+        
+        return condt
+    
+    
+    def extract_hand_features(self):              
+        self.isHandStatic = False
+        
+        x0 = (self.boxes[:, 0] + self.boxes[:, 2]) / 2
+        y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
+        
+        handXy = np.stack((x0, y0), axis=-1)
+        handMaxY0 = np.max(y0)
+        
+        handCenter = np.array([(max(x0)+min(x0))/2, (max(y0)+min(y0))/2])
+        
+        handMaxDist = np.max(np.linalg.norm(handXy - handCenter))
+        
+        if handMaxDist < self.HAND_STATIC_THRESH:
+            self.isHandStatic = True
+            return
+            
+        
+        
+    
+    
+    
+    
+    def is_Upward(self):
+        '''判断商品是否取出，'''
+        print(f"The ID is: {self.tid}")
+        
+    def is_FreeMove(self):
+        if self.frnum == 1:
+            return True
+        # print(f"The ID is: {self.tid}")
+        
+        
+        y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2 
+        det_y0 = np.diff(y0, axis=0)
+        sum_y0 = y0[-1] - y0[0]
+        
+        '''情况1：中心点向下 '''
+        ## 初始条件：商品第一次检测到在购物车内
+        condt0 = y0[0] > self.CART_HIGH_THRESH1
+        
+        condt_a = False
+        ## 条件1：商品初始为静止状态，静止条件应严格一些
+        condt11, condt12 = False, False
+        if len(self.static_y1)>0:
+            condt11 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 5
+        if len(self.static_y2)>0:   
+            condt12 = self.static_y2[0][0]==0 and self.static_y2[0][1] - self.static_y2[0][0] >= 5        
+        
+        # 条件2：商品中心发生向下移动
+        condt2 =  y0[-1] > y0[0]
+        
+        # 综合判断a
+        condt_a = condt0 and (condt11 or condt12) and condt2
+
+        '''情况2：中心点向上 '''
+        ## 商品中心点向上移动，但没有关联的Hand轨迹，也不是左右边界点
+        condt_b =  condt0 and len(self.hands)==0 and  y0[-1] < y0[0] and (not self.is_edge_cornpoint())
+
+
+        '''情况3: 商品在购物车内，但运动方向无序'''
+        ## 中心点在购物车内，纵向轨迹和小于轨迹差中绝对值最大的两个值的和，说明运动没有主方向
+        condt_c = False
+        if self.frnum > 3:
+            condt_c = all(y0>self.CART_HIGH_THRESH1) and \
+                (abs(sum_y0) < sum(np.sort(np.abs(det_y0))[::-1][:2])-1)
+        
+        condt = (condt_a or condt_b or condt_c) and self.cls!=0  
+        
+        return condt
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
+    
--- a/ytracking/tracking/have_tracking.py
+++ b/ytracking/tracking/have_tracking.py
@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Sep 20 17:33:00 2023
+
+@author: ym
+"""
+import sys
+
+import cv2
+import os
+import numpy as np
+import time
+import pickle
+import matplotlib.pyplot as plt
+from scipy.spatial.distance import cdist
+
+from pathlib import Path
+from datetime import datetime
+
+# ================= using for import ultralytics
+
+# sys.path.append(r"D:\DeepLearning\yolov5_track")
+
+# from utils.proBoxes import boxes_add_fid
+from ytracking.tracking.utils.plotting import boxing_img  # , Annotator, colors,
+from ytracking.tracking.utils.gen import Profile
+from ytracking.tracking.utils.drawtracks import draw5points, drawTrack, drawtracefeat, plot_frameID_y2, drawFeatures, \
+    draw_all_trajectories
+from ytracking.tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
+from ytracking.tracking.trackers import BOTSORT, BYTETracker
+
+sys.path.append("ytracking/tracking/")
+from dotrack.dotracks_back import doBackTracks
+from dotrack.dotracks_front import doFrontTracks
+
+
+# from utils.mergetrack import track_equal_track
+# from utils.basetrack import MoveState, ShoppingCart, doTracks
+
+def init_tracker(tracker_yaml=None, bs=1):
+    """
+    Initialize tracker for object tracking during prediction.
+    """
+    TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
+    cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
+
+    tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
+
+    return tracker
+
+
+def have_tracked_front():
+    '''前摄轨迹处理。已执行跟踪处理，只对跟踪结果进行分析'''
+
+    featdir = r"./data/trackfeats"
+    npydir = r"./data/tracks"
+    k = 0
+    gt = Profile()
+    for filename in os.listdir(npydir):
+        # filename = "084501222314_20240108-143651_front.npy"
+        if not filename.find("front") >= 0: continue
+
+        file, ext = os.path.splitext(filename)
+        fpath = os.path.join(npydir, filename)
+        featpath = os.path.join(featdir, file + '.pkl')
+
+        bboxes = np.load(fpath)
+        features_dict = np.load(featpath, allow_pickle=True)
+
+        with gt:
+            vts = doFrontTracks(bboxes, features_dict)
+            vts.classify()
+
+            plt = plot_frameID_y2(vts)
+            plt.savefig(f'./result/{file}_y2.png')
+            plt.close()
+
+        print(file + f" need time: {gt.dt:.2f}s")
+
+        # edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
+        # draw_all_trajectories(vts, edgeline, save_dir, filename)
+
+        # k += 1
+        # if k == 1:
+        #     break
+
+
+def have_tracked_back():
+    '''后摄轨迹处理。已执行跟踪处理，只对跟踪结果进行分析'''
+    featdir = r"./data/trackfeats"
+    npydir = r"./data/tracks"
+    k = 0
+    alltracks = []
+    gt = Profile()
+    for filename in os.listdir(npydir):
+        # filename = "084501222314_20240108-143656_back.npy"     # "加购_55.npy"
+        if not filename.find("back") >= 0: continue
+
+        t1 = time.time()
+
+        file, ext = os.path.splitext(filename)
+        fpath = os.path.join(npydir, filename)
+        featpath = os.path.join(featdir, file + '.pkl')
+
+        # try:   
+        bboxes = np.load(fpath)
+        features_dict = np.load(featpath, allow_pickle=True)
+
+        with gt:
+            vts = doBackTracks(bboxes, features_dict)
+            vts.classify()
+            # vts.merge_tracks()
+
+        print(file + f" need time: {gt.dt:.2f}s")
+        edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
+        draw_all_trajectories(vts, edgeline, save_dir, filename)
+        alltracks.append(vts)
+
+        # except Exception as e:
+        #     # print(str(e))
+        #     pass
+        # print(file+" need time: {:.2f}s".format(time.time()-t1))
+        k += 1
+        if k == 1:
+            break
+
+    if len(alltracks):
+        drawFeatures(alltracks, save_dir)
+
+
+def tracking(vboxes):
+    tracker_yaml = r"./trackers/cfg/botsort.yaml"
+    tracker = init_tracker(tracker_yaml)
+    tboxes = []
+    images = []
+
+    track_boxes = np.empty((0, 9), dtype=np.float32)
+    features_dict = {}
+
+    '''==================== 执行跟踪处理 ======================='''
+    for det, img, frame in vboxes:
+        # 需要根据frame_id重排序
+        det_tracking = Boxes(det).cpu().numpy()
+        H, W = img.shape[:2]
+        imgs = []
+        for d in range(np.size(det, 0)):
+            tlbr = det[d, :4].astype(np.int_)
+            tlbr[0] = max(0, tlbr[0])
+            tlbr[1] = max(0, tlbr[1])
+            tlbr[2] = min(W - 1, tlbr[2])
+            tlbr[3] = min(H - 1, tlbr[3])
+            patch = img[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
+            patch = patch[:, :, ::-1]  # 原程序用PIL.Image读取，是RGB，Opencv是BGR
+            imgs.append(patch)
+
+        tracks = tracker.update(det_tracking, imgs)
+
+        if len(tracks):
+            track_boxes = np.concatenate([track_boxes, tracks], axis=0)
+            feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
+            frame_id = tracks[0, 7]
+            features_dict.update({int(frame_id): feat_dict})
+
+            # det = tracks[:, :-1]
+            # tboxes.append((det, frame))
+
+            imgx = boxing_img(tracks, img)
+            images.append((imgx, frame))
+
+    # bboxes = boxes_add_fid(tboxes)
+
+    vts = doBackTracks(track_boxes, features_dict)
+    vts.classify()
+
+    return vts, images
+
+
+def do_tracking():
+    pkldir = r"./data/boxes_imgs"
+    k = 0
+    save_result = True
+    alltracks = []
+    gt = Profile()
+    for filename in os.listdir(pkldir):
+        filename = "加购_18.pkl"
+        file, _ = os.path.splitext(filename)
+        vboxes = []
+        ##================================ load the detection data
+        with open(pkldir + f'/{filename}', 'rb') as f:
+            vboxes = pickle.load(f)
+        assert len(vboxes) > 0
+
+        with gt:
+            vts, images = tracking(vboxes)
+            alltracks.append(vts)
+
+        print(file + f" need time: {gt.dt * 1E3:.1f}ms")
+        ##================================ save images, video, track-trajectory
+        if save_result == True:
+            curdir = imgdir.joinpath(file)
+            if not curdir.exists():
+                curdir.mkdir(parents=True, exist_ok=True)
+            vidpath = str(curdir.joinpath(file).with_suffix('.mp4'))
+
+            fps, w, h = 30, images[0][0].shape[1], images[0][0].shape[0]
+            vidwriter = cv2.VideoWriter(vidpath, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+            for img, frame in images:
+                imgpath = curdir.joinpath(file + f"_{frame}.png")
+                cv2.imwrite(imgpath, img)
+                vidwriter.write(img)
+            vidwriter.release()
+            edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
+            draw_all_trajectories(vts, edgeline, save_dir, filename)
+
+        k += 1
+        if k == 1:
+            break
+
+    drawFeatures(alltracks, save_dir)
+
+
+def have_tracked(bboxes, features_dict, camera_id):
+    if camera_id == '0':
+        vts = doBackTracks(bboxes, features_dict)
+        vts.classify()
+    elif camera_id == '1':
+        vts = doFrontTracks(bboxes, features_dict)
+        vts.classify()
+    else:
+        raise ValueError("have no camera_id")
+    return vts
+
+
+if __name__ == "__main__":
+    now = datetime.now()
+    time_string = now.strftime("%Y%m%d%H%M%S")[:8]
+
+    # save_dir = Path(f'./result/{time_string}_traj/')
+    # if not save_dir.exists():
+    #     save_dir.mkdir(parents=True, exist_ok=True)
+
+    save_dir = Path(f'./result/')
+
+    mode = "merge"  ## "merge": 已完成跟踪处理, "other": 未执行跟踪处理
+    if mode == "merge":
+        # have_tracked_back()
+        have_tracked_front()
+    else:
+        '''执行do_tracking()函数时视频和图像存储位置'''
+        imgdir = Path(f'./result/{time_string}_imgs/')
+        if not imgdir.exists():
+            imgdir.mkdir(parents=True, exist_ok=True)
+
+        do_tracking()
--- a/ytracking/tracking/test_merge.py
+++ b/ytracking/tracking/test_merge.py
@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 23 11:04:48 2024
+
+@author: ym
+"""
+import numpy as np
+import cv2
+from scipy.spatial.distance import cdist
+# from trackers.utils import matching
+
+def readDict(boxes, feat_dicts):
+    feat = []
+    for i in range(boxes.shape[0]):   
+        tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
+        feat.append(feat_dicts[fid][bid])
+        
+        # img = feat_dicts[fid][f'{bid}_img']
+        # cv2.imwrite(f'./result/imgs/{tid}_{fid}_{bid}.png', img)
+    
+    return np.asarray(feat, dtype=np.float32)
+
+
+
+def track_equal_track(atrack, btrack, feat_dicts):
+    # boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
+    aboxes = atrack.boxes
+    bboxes = btrack.boxes
+    
+    ''' 1. 判断轨迹在时序上是否有交集 '''
+    afids = aboxes[:, 7].astype(np.int_)
+    bfids = bboxes[:, 7].astype(np.int_)
+    # 帧索引交集
+    interfid = set(afids).intersection(set(bfids))
+    
+    # 或者直接判断帧索引是否有交集，返回 Ture or False
+    # interfid = set(afids).isdisjoint(set(bfids))
+    
+    
+    ''' 2. 轨迹空间iou'''
+    alabel = np.array([0] * afids.size, dtype=np.int_)
+    blabel = np.array([1] * bfids.size, dtype=np.int_)
+    
+    label = np.concatenate((alabel, blabel), axis=0)
+    fids = np.concatenate((afids, bfids), axis=0)
+    indices = np.argsort(fids)
+    idx_pair = []
+    for i in range(len(indices)-1):
+        idx1, idx2 = indices[i], indices[i+1]
+        if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] == 1:
+            if label[idx1] == 0:
+                a_idx = idx1
+                b_idx = idx2-alabel.size
+            else:
+                a_idx = idx2
+                b_idx = idx1-alabel.size 
+            
+            idx_pair.append((a_idx, b_idx))
+    
+    ious = []
+    for a, b in idx_pair:
+        abox, bbox = aboxes[a, :], bboxes[b, :]
+        
+        xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2 
+        xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
+        
+        xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
+        xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2 
+        
+        
+        inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
+                     (np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
+                     
+        # Union Area
+        box1_area = abox[2] * abox[3]
+        box2_area = bbox[2] * bbox[3]
+        union = box1_area + box2_area - inter + 1e-6
+
+        ious.append(inter/union)
+    
+    ''' 3. 轨迹特征相似度判断'''
+    afeat = readDict(aboxes, feat_dicts)
+    bfeat = readDict(bboxes, feat_dicts)
+    feat = np.concatenate((afeat, bfeat), axis=0)
+    
+    emb_simil = 1-np.maximum(0.0, cdist(feat, feat, 'cosine'))
+    emb_ = 1-cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine')
+     
+    cont1 = False if len(interfid) else True
+    cont2 = all(iou>0.5 for iou in ious)
+    cont3 = emb_[0, 0]>0.75
+    
+    cont = cont1 and cont2 and cont3
+    
+    
+    
+    
+    
+    
+    
+    return cont
+
+
+
+def track_equal_str(atrack, btrack):
+    if atrack == btrack:
+        return True
+    else:
+        return False
+
+
+def merge_track(Residual):
+    out_list = []
+    alist = [t for t in Residual]
+    while alist:
+        atrack = alist[0]
+        cur_list = []
+        cur_list.append(atrack)
+        alist.pop(0)
+        
+        blist = [b for b in alist]
+        alist = []
+        for btrack in blist:
+            if track_equal_str(atrack, btrack):
+                cur_list.append(btrack)
+            else:
+                alist.append(btrack)
+                
+        out_list.append(cur_list)
+    return out_list
+    
+def main():
+    Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
+    out_list = merge_track(Residual)
+    
+    print(Residual)
+    print(out_list)
+
+if __name__ == "__main__": 
+    
+    main()
+
+
+
+
+# =============================================================================
+# for i, atrack in enumerate(input_list):
+#     cur_list = []
+#     cur_list.append(atrack)
+#     del input_list[i]
+#     
+#     for j, btrack in enumerate(input_list):
+#         if track_equal(atrack, btrack):
+#             cur_list.append(btrack)
+#             del input_list[j]
+#     
+#     out_list.append(cur_list)
+# =============================================================================
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+   
--- a/ytracking/tracking/trackers/README.md
+++ b/ytracking/tracking/trackers/README.md
@ -0,0 +1,94 @@
+# Tracker
+
+## Supported Trackers
+
+- [x] ByteTracker
+- [x] BoT-SORT
+
+## Usage
+
+### python interface:
+
+You can use the Python interface to track objects using the YOLO model.
+
+```python
+from ultralytics import YOLO
+
+model = YOLO("yolov8n.pt")  # or a segmentation model .i.e yolov8n-seg.pt
+model.track(
+    source="video/streams",
+    stream=True,
+    tracker="botsort.yaml",  # or 'bytetrack.yaml'
+    show=True,
+)
+```
+
+You can get the IDs of the tracked objects using the following code:
+
+```python
+from ultralytics import YOLO
+
+model = YOLO("yolov8n.pt")
+
+for result in model.track(source="video.mp4"):
+    print(
+        result.boxes.id.cpu().numpy().astype(int)
+    )  # this will print the IDs of the tracked objects in the frame
+```
+
+If you want to use the tracker with a folder of images or when you loop on the video frames, you should use the `persist` parameter to tell the model that these frames are related to each other so the IDs will be fixed for the same objects. Otherwise, the IDs will be different in each frame because in each loop, the model creates a new object for tracking, but the `persist` parameter makes it use the same object for tracking.
+
+```python
+import cv2
+from ultralytics import YOLO
+
+cap = cv2.VideoCapture("video.mp4")
+model = YOLO("yolov8n.pt")
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+    results = model.track(frame, persist=True)
+    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
+    ids = results[0].boxes.id.cpu().numpy().astype(int)
+    for box, id in zip(boxes, ids):
+        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
+        cv2.putText(
+            frame,
+            f"Id {id}",
+            (box[0], box[1]),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 0, 255),
+            2,
+        )
+    cv2.imshow("frame", frame)
+    if cv2.waitKey(1) & 0xFF == ord("q"):
+        break
+```
+
+## Change tracker parameters
+
+You can change the tracker parameters by editing the `tracker.yaml` file which is located in the ultralytics/cfg/trackers folder.
+
+## Command Line Interface (CLI)
+
+You can also use the command line interface to track objects using the YOLO model.
+
+```bash
+yolo detect track source=... tracker=...
+yolo segment track source=... tracker=...
+yolo pose track source=... tracker=...
+```
+
+By default, trackers will use the configuration in `ultralytics/cfg/trackers`. We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/cfg/trackers`.
+
+## Contribute to Our Trackers Section
+
+Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section! Your real-world applications and solutions could be invaluable for users working on tracking tasks.
+
+By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community.
+
+To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table!
+
+Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏!
--- a/ytracking/tracking/trackers/init.py
+++ b/ytracking/tracking/trackers/init.py
@ -0,0 +1,10 @@
+# Ultralytics YOLO ğŸš€, AGPL-3.0 license
+
+from .bot_sort import BOTSORT
+from .byte_tracker import BYTETracker
+from .track import register_tracker
+
+
+
+__all__ = 'register_tracker', 'BOTSORT', 'BYTETracker'  # allow simpler import
+
--- a/ytracking/tracking/trackers/pycache/init.cpython-38.pyc
+++ b/ytracking/tracking/trackers/pycache/init.cpython-38.pyc
--- a/ytracking/tracking/trackers/pycache/init.cpython-39.pyc
+++ b/ytracking/tracking/trackers/pycache/init.cpython-39.pyc
--- a/ytracking/tracking/trackers/pycache/basetrack.cpython-38.pyc
+++ b/ytracking/tracking/trackers/pycache/basetrack.cpython-38.pyc
--- a/ytracking/tracking/trackers/pycache/basetrack.cpython-39.pyc
+++ b/ytracking/tracking/trackers/pycache/basetrack.cpython-39.pyc
--- a/ytracking/tracking/trackers/pycache/bot_sort.cpython-38.pyc
+++ b/ytracking/tracking/trackers/pycache/bot_sort.cpython-38.pyc
--- a/ytracking/tracking/trackers/pycache/bot_sort.cpython-39.pyc
+++ b/ytracking/tracking/trackers/pycache/bot_sort.cpython-39.pyc
--- a/ytracking/tracking/trackers/pycache/byte_tracker.cpython-38.pyc
+++ b/ytracking/tracking/trackers/pycache/byte_tracker.cpython-38.pyc
--- a/ytracking/tracking/trackers/pycache/byte_tracker.cpython-39.pyc
+++ b/ytracking/tracking/trackers/pycache/byte_tracker.cpython-39.pyc
--- a/ytracking/tracking/trackers/pycache/track.cpython-38.pyc
+++ b/ytracking/tracking/trackers/pycache/track.cpython-38.pyc
--- a/ytracking/tracking/trackers/pycache/track.cpython-39.pyc
+++ b/ytracking/tracking/trackers/pycache/track.cpython-39.pyc
--- a/ytracking/tracking/trackers/basetrack.py
+++ b/ytracking/tracking/trackers/basetrack.py
@ -0,0 +1,71 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from collections import OrderedDict
+
+import numpy as np
+
+
+class TrackState:
+    """Enumeration of possible object tracking states."""
+
+    New = 0
+    Tracked = 1
+    Lost = 2
+    Removed = 3
+
+
+class BaseTrack:
+    """Base class for object tracking, handling basic track attributes and operations."""
+
+    _count = 0
+
+    track_id = 0
+    is_activated = False
+    state = TrackState.New
+
+    history = OrderedDict()
+    features = []
+    curr_feature = None
+    score = 0
+    start_frame = 0
+    frame_id = 0
+    time_since_update = 0
+
+    # Multi-camera
+    location = (np.inf, np.inf)
+
+    @property
+    def end_frame(self):
+        """Return the last frame ID of the track."""
+        return self.frame_id
+
+    @staticmethod
+    def next_id():
+        """Increment and return the global track ID counter."""
+        BaseTrack._count += 1
+        return BaseTrack._count
+
+    def activate(self, *args):
+        """Activate the track with the provided arguments."""
+        raise NotImplementedError
+
+    def predict(self):
+        """Predict the next state of the track."""
+        raise NotImplementedError
+
+    def update(self, *args, **kwargs):
+        """Update the track with new observations."""
+        raise NotImplementedError
+
+    def mark_lost(self):
+        """Mark the track as lost."""
+        self.state = TrackState.Lost
+
+    def mark_removed(self):
+        """Mark the track as removed."""
+        self.state = TrackState.Removed
+
+    @staticmethod
+    def reset_id():
+        """Reset the global track ID counter."""
+        BaseTrack._count = 0
--- a/ytracking/tracking/trackers/bot_sort.py
+++ b/ytracking/tracking/trackers/bot_sort.py
@ -0,0 +1,198 @@
+# Ultralytics YOLO ğŸš€, AGPL-3.0 license
+
+from collections import deque
+
+import numpy as np
+
+from .basetrack import TrackState
+from .byte_tracker import BYTETracker, STrack
+from .utils import matching
+# from .utils.gmc import GMC
+from .utils.kalman_filter import KalmanFilterXYWH
+
+from .reid.reid_interface import ReIDInterface
+# from .reid.config import config
+from tools.config import config
+class BOTrack(STrack):
+    shared_kalman = KalmanFilterXYWH()
+
+    def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
+        """Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
+        super().__init__(tlwh, score, cls)
+
+        self.smooth_feat = None
+        self.curr_feat = None
+        if feat is not None:
+            self.update_features(feat)
+        self.features = deque([], maxlen=feat_history)
+        self.alpha = 0.9
+
+    def update_features(self, feat):
+        """Update features vector and smooth it using exponential moving average."""
+        feat /= np.linalg.norm(feat)
+        self.curr_feat = feat
+        if self.smooth_feat is None:
+            self.smooth_feat = feat
+        else:
+            self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
+        self.features.append(feat)
+        self.smooth_feat /= np.linalg.norm(self.smooth_feat)
+
+    def predict(self):
+        """Predicts the mean and covariance using Kalman filter."""
+        mean_state = self.mean.copy()
+        if self.state != TrackState.Tracked:
+            mean_state[6] = 0
+            mean_state[7] = 0
+
+        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        """Reactivates a track with updated features and optionally assigns a new ID."""
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+        super().re_activate(new_track, frame_id, new_id)
+
+    def update(self, new_track, frame_id):
+        """Update the YOLOv8 instance with new track and frame ID."""
+        if new_track.curr_feat is not None:
+            self.update_features(new_track.curr_feat)
+        super().update(new_track, frame_id)
+
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+        """
+        if self.mean is None:
+            return self._tlwh.copy()
+        ret = self.mean[:4].copy()
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    @staticmethod
+    def multi_predict(stracks):
+        """Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
+        if len(stracks) <= 0:
+            return
+        multi_mean = np.asarray([st.mean.copy() for st in stracks])
+        multi_covariance = np.asarray([st.covariance for st in stracks])
+        for i, st in enumerate(stracks):
+            if st.state != TrackState.Tracked:
+                multi_mean[i][6] = 0
+                multi_mean[i][7] = 0
+        multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
+        for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+            stracks[i].mean = mean
+            stracks[i].covariance = cov
+
+    def convert_coords(self, tlwh):
+        """Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
+        return self.tlwh_to_xywh(tlwh)
+
+    @staticmethod
+    def tlwh_to_xywh(tlwh):
+        """Convert bounding box to format `(center x, center y, width,
+        height)`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        return ret
+
+
+class BOTSORT(BYTETracker):
+
+    def __init__(self, args, frame_rate=30):
+        """Initialize YOLOv8 object with ReID module and GMC algorithm."""
+        super().__init__(args, frame_rate)
+        # ReID module
+        self.proximity_thresh = args.proximity_thresh
+        self.appearance_thresh = args.appearance_thresh
+
+        if args.with_reid:
+            # Haven't supported BoT-SORT(reid) yet
+            self.encoder = ReIDInterface(config)
+        
+        # self.gmc = GMC(method=args.gmc_method)     # commented by WQG
+
+    def get_kalmanfilter(self):
+        """Returns an instance of KalmanFilterXYWH for object tracking."""
+        return KalmanFilterXYWH()
+
+    def init_track(self, dets, scores, cls, imgs):
+        """Initialize track with detections, scores, and classes."""
+        if len(dets) == 0:
+            return []
+        if self.args.with_reid and self.encoder is not None:
+            features_keep = self.encoder.inference(imgs, dets)
+            return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)]  # detections
+        else:
+            return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)]  # detections
+
+    def get_dists(self, tracks, detections):
+        """Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
+        dists = matching.iou_distance(tracks, detections)
+        # proximity_thresh 应该设较大的值，表示只有两个boxes离得较远时，不考虑reid特征
+        dists_mask = (dists > self.proximity_thresh)
+
+        # TODO: mot20
+        # if not self.args.mot20:
+        dists = matching.fuse_score(dists, detections)
+
+        if self.args.with_reid and self.encoder is not None:
+            emb_dists = matching.embedding_distance(tracks, detections) / 2.0
+            emb_dists[emb_dists > self.appearance_thresh] = 1.0
+            emb_dists[dists_mask] = 1.0
+            dists = np.minimum(dists, emb_dists)
+  
+        return dists
+    
+    def get_dists_1(self, tracks, detections):
+        """Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
+        iou_dists = matching.iou_distance(tracks, detections)
+        iou_dists_mask = (iou_dists>0.9)
+        
+        iou_dists = matching.fuse_score(iou_dists, detections)
+        weight = 0.4
+        if self.args.with_reid and self.encoder is not None:
+            emb_dists = matching.embedding_distance(tracks, detections)
+            
+            '''============ iou_dists 和 emb_dists 融合有两种策略 ==========='''
+            '''1. reid 相似度阈值，低于该值的两 boxes 图像不可能是同一对象，需要确定一个合理的可信阈值
+               2. iou 的约束为若约束，故 iou_dists 应设置为较大的值
+            '''
+            emb_dists_mask = (emb_dists > 0.85)
+            iou_dists[emb_dists_mask] = 1
+            emb_dists[iou_dists_mask] = 1
+  
+            dists = np.minimum(iou_dists, emb_dists)
+            '''2. embed 阈值'''
+            # dists = (1-weight)*iou_dists + weight*emb_dists
+        else:
+            
+            dists = iou_dists.copy()
+  
+        return dists
+
+
+
+
+
+
+
+    def multi_predict(self, tracks):
+        """Predict and track multiple objects with YOLOv8 model."""
+        BOTrack.multi_predict(tracks)
+        
+        
+    def get_result(self):
+        '''written by WQG'''
+        activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx] 
+                                      for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
+        
+        track_features = []
+        if self.args.with_reid and self.encoder is not None:
+            track_features = np.asarray([x.curr_feat for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
+            
+                
+        return (activate_tracks, track_features)
--- a/ytracking/tracking/trackers/byte_tracker.py
+++ b/ytracking/tracking/trackers/byte_tracker.py
@ -0,0 +1,424 @@
+# Ultralytics YOLO ğŸš€, AGPL-3.0 license
+
+import numpy as np
+
+from .basetrack import BaseTrack, TrackState
+from .utils import matching
+from .utils.kalman_filter import KalmanFilterXYAH
+
+
+def dists_update(dists, strack_pool, detections):
+    '''written by WQG'''
+    
+    if len(strack_pool) and len(detections):
+        # alabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in strack_pool])
+        # blabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in detections])
+
+        alabel = np.array([int(stack.cls) for stack in strack_pool])
+        blabel = np.array([int(stack.cls) for stack in detections])
+        amlabel = np.expand_dims(alabel, axis=1).repeat(len(detections),axis=1)
+        bmlabel = np.expand_dims(blabel, axis=0).repeat(len(strack_pool),axis=0)
+        dist_label = 1 - (bmlabel == amlabel)
+        dists = np.where(dists > dist_label, dists, dist_label)
+    return dists
+
+
+class STrack(BaseTrack):
+    shared_kalman = KalmanFilterXYAH()
+
+    def __init__(self, tlwh, score, cls):
+        """wait activate."""
+        self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
+        self.kalman_filter = None
+        self.mean, self.covariance = None, None
+        self.is_activated = False
+
+        self.score = score
+        self.tracklet_len = 0
+        self.cls = cls
+        self.idx = tlwh[-1]
+
+    def predict(self):
+        """Predicts mean and covariance using Kalman filter."""
+        mean_state = self.mean.copy()
+        if self.state != TrackState.Tracked:
+            mean_state[7] = 0
+        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
+
+    @staticmethod
+    def multi_predict(stracks):
+        """Perform multi-object predictive tracking using Kalman filter for given stracks."""
+        if len(stracks) <= 0:
+            return
+        multi_mean = np.asarray([st.mean.copy() for st in stracks])
+        multi_covariance = np.asarray([st.covariance for st in stracks])
+        for i, st in enumerate(stracks):
+            if st.state != TrackState.Tracked:
+                multi_mean[i][7] = 0
+        multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
+        for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+            stracks[i].mean = mean
+            stracks[i].covariance = cov
+
+    @staticmethod
+    def multi_gmc(stracks, H=np.eye(2, 3)):
+        """Update state tracks positions and covariances using a homography matrix."""
+        if len(stracks) > 0:
+            multi_mean = np.asarray([st.mean.copy() for st in stracks])
+            multi_covariance = np.asarray([st.covariance for st in stracks])
+
+            R = H[:2, :2]
+            R8x8 = np.kron(np.eye(4, dtype=float), R)
+            t = H[:2, 2]
+
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                mean = R8x8.dot(mean)
+                mean[:2] += t
+                cov = R8x8.dot(cov).dot(R8x8.transpose())
+
+                stracks[i].mean = mean
+                stracks[i].covariance = cov
+
+    def activate(self, kalman_filter, frame_id):
+        """Start a new tracklet."""
+        self.kalman_filter = kalman_filter
+        self.track_id = self.next_id()
+        self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
+
+        self.tracklet_len = 0
+        self.state = TrackState.Tracked
+        if frame_id == 1:
+            self.is_activated = True
+        self.frame_id = frame_id
+        self.start_frame = frame_id
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        """Reactivates a previously lost track with a new detection."""
+        self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
+                                                               self.convert_coords(new_track.tlwh))
+        self.tracklet_len = 0
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:
+            self.track_id = self.next_id()
+        self.score = new_track.score
+        self.cls = new_track.cls
+        self.idx = new_track.idx
+
+    def update(self, new_track, frame_id):
+        """
+        Update a matched track
+        :type new_track: STrack
+        :type frame_id: int
+        :return:
+        """
+        self.frame_id = frame_id
+        self.tracklet_len += 1
+
+        new_tlwh = new_track.tlwh
+        self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
+                                                               self.convert_coords(new_tlwh))
+        self.state = TrackState.Tracked
+        self.is_activated = True
+
+        self.score = new_track.score
+        self.cls = new_track.cls
+        self.idx = new_track.idx
+
+    def convert_coords(self, tlwh):
+        """Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent."""
+        return self.tlwh_to_xyah(tlwh)
+
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+        width, height)`.
+        """
+        if self.mean is None:
+            return self._tlwh.copy()
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    @property
+    def tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    @staticmethod
+    def tlwh_to_xyah(tlwh):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+    @staticmethod
+    def tlbr_to_tlwh(tlbr):
+        """Converts top-left bottom-right format to top-left width height format."""
+        ret = np.asarray(tlbr).copy()
+        ret[2:] -= ret[:2]
+        return ret
+
+    @staticmethod
+    def tlwh_to_tlbr(tlwh):
+        """Converts tlwh bounding box format to tlbr format."""
+        ret = np.asarray(tlwh).copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def __repr__(self):
+        """Return a string representation of the BYTETracker object with start and end frames and track ID."""
+        return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
+
+
+class BYTETracker:
+
+    def __init__(self, args, frame_rate=30):
+        """Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
+        self.tracked_stracks = []  # type: list[STrack]
+        self.lost_stracks = []  # type: list[STrack]
+        self.removed_stracks = []  # type: list[STrack]
+
+        self.frame_id = 0
+        self.args = args
+        self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
+        self.kalman_filter = self.get_kalmanfilter()
+        self.reset_id()
+
+    def update(self, results, img=None):
+        """Updates object tracker with new detections and returns tracked object bounding boxes."""
+        self.frame_id += 1
+        activated_stracks = []
+        refind_stracks = []
+        lost_stracks = []
+        removed_stracks = []
+
+        scores = results.conf
+        bboxes = results.xyxy
+        # Add index
+        bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
+        cls = results.cls
+
+        remain_inds = scores > self.args.track_high_thresh
+        inds_low = scores > self.args.track_low_thresh
+        inds_high = scores < self.args.track_high_thresh
+
+        inds_second = np.logical_and(inds_low, inds_high)
+        dets_second = bboxes[inds_second]
+        dets = bboxes[remain_inds]
+        scores_keep = scores[remain_inds]
+        scores_second = scores[inds_second]
+        cls_keep = cls[remain_inds]
+        cls_second = cls[inds_second]
+
+        detections = self.init_track(dets, scores_keep, cls_keep, img)
+        
+        # Add newly detected tracklets to tracked_stracks
+        unconfirmed = []
+        tracked_stracks = []  # type: list[STrack]
+        for track in self.tracked_stracks:
+            if not track.is_activated:
+                unconfirmed.append(track)
+            else:
+                tracked_stracks.append(track)
+        
+        
+        # Step 2: First association, with high score detection boxes
+        strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
+        # Predict the current location with KF
+        self.multi_predict(strack_pool)
+        
+# ============================================================= 没必要gmc，WQG
+#         if hasattr(self, 'gmc') and img is not None:
+#             warp = self.gmc.apply(img, dets)
+#             STrack.multi_gmc(strack_pool, warp)
+#             STrack.multi_gmc(unconfirmed, warp)
+# =============================================================================
+
+        dists = self.get_dists_1(strack_pool, detections)
+        
+        '''written by WQG for different class'''
+        dists = dists_update(dists, strack_pool, detections)
+
+        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
+        for itracked, idet in matches:
+            track = strack_pool[itracked]
+            det = detections[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_stracks.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_stracks.append(track)
+        
+        
+        # Step 3: Second association, with low score detection boxes
+        # association the untrack to the low score detections
+        detections_second = self.init_track(dets_second, scores_second, cls_second, img)
+        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
+        
+        # TODO
+        dists = matching.iou_distance(r_tracked_stracks, detections_second)
+        '''written by WQG for different class'''
+        dists = dists_update(dists, r_tracked_stracks, detections_second)
+        
+        matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
+        for itracked, idet in matches:
+            track = r_tracked_stracks[itracked]
+            det = detections_second[idet]
+            if track.state == TrackState.Tracked:
+                track.update(det, self.frame_id)
+                activated_stracks.append(track)
+            else:
+                track.re_activate(det, self.frame_id, new_id=False)
+                refind_stracks.append(track)
+
+        for it in u_track:
+            track = r_tracked_stracks[it]
+            if track.state != TrackState.Lost:
+                track.mark_lost()
+                lost_stracks.append(track)
+        
+        # Deal with unconfirmed tracks, usually tracks with only one beginning frame
+        detections = [detections[i] for i in u_detection]
+        dists = self.get_dists_1(unconfirmed, detections)
+        '''written by WQG for different class'''
+        dists = dists_update(dists, unconfirmed, detections)
+        
+        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
+        for itracked, idet in matches:
+            unconfirmed[itracked].update(detections[idet], self.frame_id)
+            activated_stracks.append(unconfirmed[itracked])
+        for it in u_unconfirmed:
+            track = unconfirmed[it]
+            track.mark_removed()
+            removed_stracks.append(track)
+        # Step 4: Init new stracks
+        for inew in u_detection:
+            track = detections[inew]
+            if track.score < self.args.new_track_thresh:
+                continue
+            track.activate(self.kalman_filter, self.frame_id)
+            activated_stracks.append(track)
+        # Step 5: Update state
+        for track in self.lost_stracks:
+            if self.frame_id - track.end_frame > self.max_time_lost:
+                track.mark_removed()
+                removed_stracks.append(track)
+
+        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
+        self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_stracks)
+        self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
+        self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
+        self.lost_stracks.extend(lost_stracks)
+        self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
+        self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
+        self.removed_stracks.extend(removed_stracks)
+        if len(self.removed_stracks) > 1000:
+            self.removed_stracks = self.removed_stracks[-999:]  # clip remove stracks to 1000 maximum
+        
+        
+        return np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx] 
+                           for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
+    
+    
+    def get_result(self):
+        '''written by WQG'''
+# =============================================================================
+#         activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx] 
+#                                       for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
+#         
+#         track_features = []
+# =============================================================================
+        tracks = []
+        feats = []                  
+        for t in self.tracked_stracks:
+            if t.is_activated:
+                track = t.tlbr.tolist() + [t.track_id, t.score, t.cls, t.idx] 
+                feat = track.curr_feature
+                
+                tracks.append(track)
+                feats.append(feat)
+        
+        tracks = np.asarray(tracks, dtype=np.float32)
+   
+        return (tracks, feats)
+
+
+    def get_kalmanfilter(self):
+        """Returns a Kalman filter object for tracking bounding boxes."""
+        return KalmanFilterXYAH()
+
+    def init_track(self, dets, scores, cls, img=None):
+        """Initialize object tracking with detections and scores using STrack algorithm."""
+        return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else []  # detections
+
+    def get_dists(self, tracks, detections):
+        """Calculates the distance between tracks and detections using IOU and fuses scores."""
+        dists = matching.iou_distance(tracks, detections)
+        # TODO: mot20
+        # if not self.args.mot20:
+        dists = matching.fuse_score(dists, detections)
+        return dists
+
+    def multi_predict(self, tracks):
+        """Returns the predicted tracks using the YOLOv8 network."""
+        STrack.multi_predict(tracks)
+
+    def reset_id(self):
+        """Resets the ID counter of STrack."""
+        STrack.reset_id()
+
+    @staticmethod
+    def joint_stracks(tlista, tlistb):
+        """Combine two lists of stracks into a single one."""
+        exists = {}
+        res = []
+        for t in tlista:
+            exists[t.track_id] = 1
+            res.append(t)
+        for t in tlistb:
+            tid = t.track_id
+            if not exists.get(tid, 0):
+                exists[tid] = 1
+                res.append(t)
+        return res
+
+    @staticmethod
+    def sub_stracks(tlista, tlistb):
+        """DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
+        stracks = {t.track_id: t for t in tlista}
+        for t in tlistb:
+            tid = t.track_id
+            if stracks.get(tid, 0):
+                del stracks[tid]
+        return list(stracks.values())
+        """
+        track_ids_b = {t.track_id for t in tlistb}
+        return [t for t in tlista if t.track_id not in track_ids_b]
+
+    @staticmethod
+    def remove_duplicate_stracks(stracksa, stracksb):
+        """Remove duplicate stracks with non-maximum IOU distance."""
+        pdist = matching.iou_distance(stracksa, stracksb)
+        pairs = np.where(pdist < 0.15)
+        dupa, dupb = [], []
+        for p, q in zip(*pairs):
+            timep = stracksa[p].frame_id - stracksa[p].start_frame
+            timeq = stracksb[q].frame_id - stracksb[q].start_frame
+            if timep > timeq:
+                dupb.append(q)
+            else:
+                dupa.append(p)
+        resa = [t for i, t in enumerate(stracksa) if i not in dupa]
+        resb = [t for i, t in enumerate(stracksb) if i not in dupb]
+        return resa, resb
--- a/ytracking/tracking/trackers/cfg/botsort.yaml
+++ b/ytracking/tracking/trackers/cfg/botsort.yaml
@ -0,0 +1,18 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
+
+tracker_type: botsort  # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5  # threshold for the first association
+track_low_thresh: 0.1  # threshold for the second association
+new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
+track_buffer: 30  # buffer to calculate the time when to remove tracks
+match_thresh: 0.8  # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
+
+# BoT-SORT settings
+gmc_method: sparseOptFlow  # method of global motion compensation
+# ReID model related thresh (not supported yet)
+proximity_thresh: 0.5
+appearance_thresh: 0.25
+with_reid: True
--- a/ytracking/tracking/trackers/cfg/bytetrack.yaml
+++ b/ytracking/tracking/trackers/cfg/bytetrack.yaml
@ -0,0 +1,11 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
+
+tracker_type: bytetrack  # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5  # threshold for the first association
+track_low_thresh: 0.1  # threshold for the second association
+new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
+track_buffer: 30  # buffer to calculate the time when to remove tracks
+match_thresh: 0.8  # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
--- a/ytracking/tracking/trackers/reid/init.py
+++ b/ytracking/tracking/trackers/reid/init.py
@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 19 16:15:35 2024
+
+@author: ym
+"""
+
--- a/ytracking/tracking/trackers/reid/pycache/init.cpython-38.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/init.cpython-38.pyc
--- a/ytracking/tracking/trackers/reid/pycache/init.cpython-39.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/init.cpython-39.pyc
--- a/ytracking/tracking/trackers/reid/pycache/config.cpython-38.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/config.cpython-38.pyc
--- a/ytracking/tracking/trackers/reid/pycache/config.cpython-39.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/config.cpython-39.pyc
--- a/ytracking/tracking/trackers/reid/pycache/reid_interface.cpython-38.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/reid_interface.cpython-38.pyc
--- a/ytracking/tracking/trackers/reid/pycache/reid_interface.cpython-39.pyc
+++ b/ytracking/tracking/trackers/reid/pycache/reid_interface.cpython-39.pyc
--- a/ytracking/tracking/trackers/reid/config.py
+++ b/ytracking/tracking/trackers/reid/config.py
@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 19 14:01:46 2024
+
+@author: ym
+"""
+
+import torch
+import os
+# import torchvision.transforms as T
+class Config:
+    # network settings
+    backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
+    batch_size = 8
+    embedding_size = 256
+    img_size = 224
+    
+    current_path = os.path.dirname(os.path.abspath(__file__))
+    model_path = os.path.join(current_path, r"ckpts\resnet18_1220\best.pth")
+    
+    # model_path = "./trackers/reid/ckpts/resnet18_1220/best.pth"
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    
+# =============================================================================
+#     metric = 'arcface'  # [cosface, arcface]
+#     drop_ratio = 0.5
+#    
+#     # training settings
+#     checkpoints = "checkpoints/Mobilev3Large_1225"  # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
+#     restore = False
+#     
+#     test_model = "./checkpoints/resnet18_1220/best.pth"
+#
+# 
+#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+#     pin_memory = True  # if memory is large, set it True to speed up a bit
+#     num_workers = 4  # dataloader
+# =============================================================================
+
+config = Config()
--- a/ytracking/tracking/trackers/reid/model/BAM.py
+++ b/ytracking/tracking/trackers/reid/model/BAM.py
@ -0,0 +1,83 @@
+import torch.nn as nn
+import torchvision
+from torch.nn import init
+
+class Flatten(nn.Module):
+    def forward(self, x):
+        return x.view(x.shape[0], -1)
+
+class ChannelAttention(nn.Module):
+    def __int__(self,channel,reduction, num_layers):
+        super(ChannelAttention,self).__init__()
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+        gate_channels = [channel]
+        gate_channels += [len(channel)//reduction]*num_layers
+        gate_channels += [channel]
+
+        self.ca = nn.Sequential()
+        self.ca.add_module('flatten', Flatten())
+        for i in range(len(gate_channels)-2):
+            self.ca.add_module('',nn.Linear(gate_channels[i], gate_channels[i+1]))
+            self.ca.add_module('',nn.BatchNorm1d(gate_channels[i+1]))
+            self.ca.add_module('',nn.ReLU())
+        self.ca.add_module('',nn.Linear(gate_channels[-2], gate_channels[-1]))
+
+    def forward(self, x):
+        res = self.avgpool(x)
+        res = self.ca(res)
+        res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
+        return res
+
+class SpatialAttention(nn.Module):
+    def __int__(self, channel,reduction=16,num_lay=3,dilation=2):
+        super(SpatialAttention).__init__()
+        self.sa = nn.Sequential()
+        self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel//reduction)*3))
+        self.sa.add_module('',nn.BatchNorm2d(num_features=(channel//reduction)))
+        self.sa.add_module('',nn.ReLU())
+        for i in range(num_lay):
+            self.sa.add_module('', nn.Conv2d(kernel_size=3,
+                                             in_channels=(channel//reduction),
+                                             out_channels=(channel//reduction),
+                                             padding=1,
+                                             dilation= 2))
+            self.sa.add_module('',nn.BatchNorm2d(channel//reduction))
+            self.sa.add_module('',nn.ReLU())
+        self.sa.add_module('',nn.Conv2d(channel//reduction, 1, kernel_size=1))
+    def forward(self,x):
+        res = self.sa(x)
+        res = res.expand_as(x)
+        return res
+
+class BAMblock(nn.Module):
+    def __init__(self,channel=512, reduction=16, dia_val=2):
+        super(BAMblock, self).__init__()
+        self.ca = ChannelAttention(channel, reduction)
+        self.sa = SpatialAttention(channel,reduction,dia_val)
+        self.sigmoid = nn.Sigmoid()
+
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal(m.weight, mode='fan_out')
+                if m.bais is not None:
+                    init.constant_(m.bias, 0)
+                elif isinstance(m, nn.BatchNorm2d):
+                    init.constant_(m.weight, 1)
+                    init.constant_(m.bias, 0)
+                elif isinstance(m, nn.Linear):
+                    init.normal_(m.weight, std=0.001)
+                    if m.bias is not None:
+                        init.constant_(m.bias, 0)
+
+    def forward(self,x):
+        b, c, _, _ = x.size()
+        sa_out=self.sa(x)
+        ca_out=self.ca(x)
+        weight=self.sigmoid(sa_out+ca_out)
+        out=(1+weight)*x
+        return out
+
+if __name__ =="__main__":
+
+    print(512//14)
--- a/Show More
+++ b/Show More