This commit is contained in:
lee
2024-11-27 15:37:10 +08:00
commit 3a5214c796
696 changed files with 56947 additions and 0 deletions

0
ytracking/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

883
ytracking/models/common.py Normal file
View File

@ -0,0 +1,883 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Common modules
"""
import ast
import contextlib
import json
import math
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
from urllib.parse import urlparse
import cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
# Import 'ultralytics' package or install if if missing
try:
import ultralytics
assert hasattr(ultralytics, '__version__') # verify package is not directory
except (ImportError, AssertionError):
import os
os.system('pip install -U ultralytics')
import ultralytics
from ytracking.ultralytics.utils.plotting import Annotator, colors, save_one_box
from ytracking.utils import TryExcept
from ytracking.utils.dataloaders import exif_transpose, letterbox
from ytracking.utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
xyxy2xywh, yaml_load)
from ytracking.utils.torch_utils import copy_attr, smart_inference_mode
def autopad(k, p=None, d=1): # kernel, padding, dilation
# Pad to 'same' shape outputs
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).permute(2, 0, 1)
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.SiLU()
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class C3x(C3):
# C3 module with cross-convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
def forward(self, x):
y = self.cv1(x)
return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(
GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx --dnn
# OpenVINO: *_openvino_model
# CoreML: *.mlmodel
# TensorRT: *.engine
# TensorFlow SavedModel: *_saved_model
# TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
# PaddlePaddle: *_paddle_model
from ytracking.models.experimental import attempt_download, attempt_load # scoped to avoid circular import
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
fp16 &= pt or jit or onnx or engine or triton # FP16
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
stride = 32 # default stride
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
if not (pt or triton):
w = attempt_download(w) # download if not local
if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
model.half() if fp16 else model.float()
if extra_files['config.txt']: # load metadata dict
d = json.loads(extra_files['config.txt'],
object_hook=lambda d: {
int(k) if k.isdigit() else k: v
for k, v in d.items()})
stride, names = int(d['stride']), d['names']
elif dnn: # ONNX OpenCV DNN
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
check_requirements('opencv-python>=4.5.4')
net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers)
output_names = [x.name for x in session.get_outputs()]
meta = session.get_modelmeta().custom_metadata_map # metadata
if 'stride' in meta:
stride, names = int(meta['stride']), eval(meta['names'])
elif xml: # OpenVINO
LOGGER.info(f'Loading {w} for OpenVINO inference...')
check_requirements('openvino>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/
from openvino.runtime import Core, Layout, get_batch
core = Core()
if not Path(w).is_file(): # if not *.xml
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
ov_model = core.read_model(model=w, weights=Path(w).with_suffix('.bin'))
if ov_model.get_parameters()[0].get_layout().empty:
ov_model.get_parameters()[0].set_layout(Layout('NCHW'))
batch_dim = get_batch(ov_model)
if batch_dim.is_static:
batch_size = batch_dim.get_length()
ov_compiled_model = core.compile_model(ov_model, device_name='AUTO') # AUTO selects best available device
stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
elif engine: # TensorRT
LOGGER.info(f'Loading {w} for TensorRT inference...')
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
if device.type == 'cpu':
device = torch.device('cuda:0')
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
context = model.create_execution_context()
bindings = OrderedDict()
output_names = []
fp16 = False # default updated below
dynamic = False
for i in range(model.num_bindings):
name = model.get_binding_name(i)
dtype = trt.nptype(model.get_binding_dtype(i))
if model.binding_is_input(i):
if -1 in tuple(model.get_binding_shape(i)): # dynamic
dynamic = True
context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
if dtype == np.float16:
fp16 = True
else: # output
output_names.append(name)
shape = tuple(context.get_binding_shape(i))
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct
model = ct.models.MLModel(w)
elif saved_model: # TF SavedModel
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
import tensorflow as tf
keras = False # assume TF1 saved_model
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
ge = x.graph.as_graph_element
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
def gd_outputs(gd):
name_list, input_list = [], []
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
name_list.append(node.name)
input_list.extend(node.input)
return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
gd = tf.Graph().as_graph_def() # TF GraphDef
with open(w, 'rb') as f:
gd.ParseFromString(f.read())
frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
from tflite_runtime.interpreter import Interpreter, load_delegate
except ImportError:
import tensorflow as tf
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
delegate = {
'Linux': 'libedgetpu.so.1',
'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
else: # TFLite
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
interpreter = Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
# load metadata
with contextlib.suppress(zipfile.BadZipFile):
with zipfile.ZipFile(w, 'r') as model:
meta_file = model.namelist()[0]
meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
stride, names = int(meta['stride']), meta['names']
elif tfjs: # TF.js
raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
elif paddle: # PaddlePaddle
LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
import paddle.inference as pdi
if not Path(w).is_file(): # if not *.pdmodel
w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
weights = Path(w).with_suffix('.pdiparams')
config = pdi.Config(str(w), str(weights))
if cuda:
config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
predictor = pdi.create_predictor(config)
input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
output_names = predictor.get_output_names()
elif triton: # NVIDIA Triton Inference Server
LOGGER.info(f'Using {w} as Triton Inference Server...')
check_requirements('tritonclient[all]')
from utils.triton import TritonRemoteModel
model = TritonRemoteModel(url=w)
nhwc = model.runtime.startswith('tensorflow')
else:
raise NotImplementedError(f'ERROR: {w} is not a supported format')
# class names
if 'names' not in locals():
names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.fp16 and im.dtype != torch.float16:
im = im.half() # to FP16
if self.nhwc:
im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.pt: # PyTorch
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
elif self.jit: # TorchScript
y = self.model(im)
elif self.dnn: # ONNX OpenCV DNN
im = im.cpu().numpy() # torch to numpy
self.net.setInput(im)
y = self.net.forward()
elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
y = list(self.ov_compiled_model(im).values())
elif self.engine: # TensorRT
if self.dynamic and im.shape != self.bindings['images'].shape:
i = self.model.get_binding_index('images')
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
for name in self.output_names:
i = self.model.get_binding_index(name)
self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
s = self.bindings['images'].shape
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs['images'] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = [self.bindings[x].data for x in sorted(self.output_names)]
elif self.coreml: # CoreML
im = im.cpu().numpy()
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.BILINEAR)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else:
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
elif self.paddle: # PaddlePaddle
im = im.cpu().numpy().astype(np.float32)
self.input_handle.copy_from_cpu(im)
self.predictor.run()
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
elif self.triton: # NVIDIA Triton Inference Server
y = self.model(im)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.cpu().numpy()
if self.saved_model: # SavedModel
y = self.model(im, training=False) if self.keras else self.model(im)
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im))
else: # Lite or Edge TPU
input = self.input_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
self.interpreter.invoke()
y = []
for output in self.output_details:
x = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
x = (x.astype(np.float32) - zero_point) * scale # re-scale
y.append(x)
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
return self.from_numpy(y)
def from_numpy(self, x):
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
for _ in range(2 if self.jit else 1): #
self.forward(im) # warmup
@staticmethod
def _model_type(p='path/to/model.pt'):
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
from utils.export import export_formats
from utils.downloads import is_url
sf = list(export_formats().Suffix) # export suffixes
if not is_url(p, check=False):
check_suffix(p, sf) # checks
url = urlparse(p) # if url may be Triton inference server
types = [s in Path(p).name for s in sf]
types[8] &= not types[9] # tflite &= not edgetpu
triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
return types + [triton]
@staticmethod
def _load_metadata(f=Path('path/to/meta.yaml')):
# Load metadata from meta.yaml if it exists
if f.exists():
d = yaml_load(f)
return d['stride'], d['names'] # assign stride, names
return None, None
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
multi_label = False # NMS multiple labels per box
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
max_det = 1000 # maximum number of detections per image
amp = False # Automatic Mixed Precision (AMP) inference
def __init__(self, model, verbose=True):
super().__init__()
if verbose:
LOGGER.info('Adding AutoShape... ')
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
self.pt = not self.dmb or model.pt # PyTorch model
self.model = model.eval()
if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
m.inplace = False # Detect.inplace=False for safe multithread inference
m.export = True # do not output loss values
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
@smart_inference_mode()
def forward(self, ims, size=640, augment=False, profile=False):
# Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
# file: ims = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
dt = (Profile(), Profile(), Profile())
with dt[0]:
if isinstance(size, int): # expand
size = (size, size)
p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
if isinstance(ims, torch.Tensor): # torch
with amp.autocast(autocast):
return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
# Pre-process
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(ims):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = max(size) / max(s) # gain
shape1.append([int(y * g) for y in s])
ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
with amp.autocast(autocast):
# Inference
with dt[1]:
y = self.model(x, augment=augment) # forward
# Post-process
with dt[2]:
y = non_max_suppression(y if self.dmb else y[0],
self.conf,
self.iou,
self.classes,
self.agnostic,
self.multi_label,
max_det=self.max_det) # NMS
for i in range(n):
scale_boxes(shape1, y[i][:, :4], shape0[i])
return Detections(ims, y, files, dt, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
self.ims = ims # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.times = times # profiling times
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
self.s = tuple(shape) # inference BCHW shape
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
s, crops = '', []
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
s = s.rstrip(', ')
if show or save or render or crop:
annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
crops.append({
'box': box,
'conf': conf,
'cls': cls,
'label': label,
'im': save_one_box(box, im, file=file, save=save)})
else: # all others
annotator.box_label(box, label if labels else '', color=colors(cls))
im = annotator.im
else:
s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if show:
if is_jupyter():
from IPython.display import display
display(im)
else:
im.show(self.files[i])
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render:
self.ims[i] = np.asarray(im)
if pprint:
s = s.lstrip('\n')
return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
if crop:
if save:
LOGGER.info(f'Saved results to {save_dir}\n')
return crops
@TryExcept('Showing images is not supported in this environment')
def show(self, labels=True):
self._run(show=True, labels=labels) # show results
def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
self._run(save=True, labels=labels, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
return self._run(crop=True, save=save, save_dir=save_dir) # crop results
def render(self, labels=True):
self._run(render=True, labels=labels) # render results
return self.ims
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
r = range(self.n) # iterable
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
# for d in x:
# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
# setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def print(self):
LOGGER.info(self.__str__())
def __len__(self): # override len(results)
return self.n
def __str__(self): # override print(results)
return self._run(pprint=True) # print results
def __repr__(self):
return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
class Proto(nn.Module):
# YOLOv5 mask Proto module for segmentation models
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
super().__init__()
self.cv1 = Conv(c1, c_, k=3)
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.cv2 = Conv(c_, c_, k=3)
self.cv3 = Conv(c_, c2)
def forward(self, x):
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
class Classify(nn.Module):
# YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self,
c1,
c2,
k=1,
s=1,
p=None,
g=1,
dropout_p=0.0): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
super().__init__()
c_ = 1280 # efficientnet_b0 size
self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
self.drop = nn.Dropout(p=dropout_p, inplace=True)
self.linear = nn.Linear(c_, c2) # to x(b,c2)
def forward(self, x):
if isinstance(x, list):
x = torch.cat(x, 1)
return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))

View File

@ -0,0 +1,111 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Experimental modules
"""
import math
import numpy as np
import torch
import torch.nn as nn
from utils.downloads import attempt_download
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
super().__init__()
n = len(k) # number of convolutions
if equal_ch: # equal c_ per group
i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(n)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * n
a = np.eye(n + 1, n, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([
nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU()
def forward(self, x):
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, device=None, inplace=True, fuse=True):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
from models.yolo import Detect, Model
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location='cpu') # load
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
# Model compatibility updates
if not hasattr(ckpt, 'stride'):
ckpt.stride = torch.tensor([32.])
if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
# Module updates
for m in model.modules():
t = type(m)
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
m.inplace = inplace
if t is Detect and not isinstance(m.anchor_grid, list):
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model
if len(model) == 1:
return model[-1]
# Return detection ensemble
print(f'Ensemble created with {weights}\n')
for k in 'names', 'nc', 'yaml':
setattr(model, k, getattr(model[0], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
return model

View File

@ -0,0 +1,59 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Default anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9,11, 21,19, 17,41] # P3/8
- [43,32, 39,70, 86,64] # P4/16
- [65,131, 134,130, 120,265] # P5/32
- [282,180, 247,354, 512,387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28,41, 67,59, 57,141] # P3/8
- [144,103, 129,227, 270,205] # P4/16
- [209,452, 455,396, 358,812] # P5/32
- [653,922, 1109,570, 1387,1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11,11, 13,30, 29,20] # P3/8
- [30,46, 61,38, 39,92] # P4/16
- [78,80, 146,66, 79,163] # P5/32
- [149,150, 321,143, 157,303] # P6/64
- [257,402, 359,290, 524,372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19,22, 54,36, 32,77] # P3/8
- [70,83, 138,71, 75,173] # P4/16
- [165,159, 148,334, 375,151] # P5/32
- [334,317, 251,626, 499,474] # P6/64
- [750,326, 534,814, 1079,818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29,34, 81,55, 47,115] # P3/8
- [105,124, 207,107, 113,259] # P4/16
- [247,238, 222,500, 563,227] # P5/32
- [501,476, 376,939, 749,711] # P6/64
- [1126,489, 801,1222, 1618,1227] # P7/128

View File

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

View File

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 BiFPN head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,42 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 FPN head
head:
[[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, C3, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, C3, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,54 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
]

View File

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 6, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, SPPF, [ 1024, 5 ] ], # 9
]
# YOLOv5 v6.0 head with (P3, P4) outputs
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
[ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
]

View File

@ -0,0 +1,56 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,67 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 3, C3, [1280]],
[-1, 1, SPPF, [1280, 5]], # 13
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
head:
[[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,49 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3Ghost, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3Ghost, [512, False]], # 13
[-1, 1, GhostConv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.5 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

608
ytracking/models/tf.py Normal file
View File

@ -0,0 +1,608 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
Usage:
$ python models/tf.py --weights yolov5s.pt
Export:
$ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras
from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
DWConvTranspose2d, Focus, autopad)
from models.experimental import MixConv2d, attempt_load
from models.yolo import Detect, Segment
from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args
class TFBN(keras.layers.Layer):
# TensorFlow BatchNormalization wrapper
def __init__(self, w=None):
super().__init__()
self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
epsilon=w.eps)
def call(self, inputs):
return self.bn(inputs)
class TFPad(keras.layers.Layer):
# Pad inputs in spatial dimensions 1 and 2
def __init__(self, pad):
super().__init__()
if isinstance(pad, int):
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
else: # tuple/list
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
def call(self, inputs):
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
class TFConv(keras.layers.Layer):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
conv = keras.layers.Conv2D(
filters=c2,
kernel_size=k,
strides=s,
padding='SAME' if s == 1 else 'VALID',
use_bias=not hasattr(w, 'bn'),
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFDWConv(keras.layers.Layer):
# Depthwise convolution
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
conv = keras.layers.DepthwiseConv2D(
kernel_size=k,
depth_multiplier=c2 // c1,
strides=s,
padding='SAME' if s == 1 else 'VALID',
use_bias=not hasattr(w, 'bn'),
depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFDWConvTranspose2d(keras.layers.Layer):
# Depthwise ConvTranspose2d
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
self.c1 = c1
self.conv = [
keras.layers.Conv2DTranspose(filters=1,
kernel_size=k,
strides=s,
padding='VALID',
output_padding=p2,
use_bias=True,
kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
def call(self, inputs):
return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
class TFFocus(keras.layers.Layer):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
# inputs = inputs / 255 # normalize 0-255 to 0-1
inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
return self.conv(tf.concat(inputs, 3))
class TFBottleneck(keras.layers.Layer):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFCrossConv(keras.layers.Layer):
# Cross Convolution
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFConv2d(keras.layers.Layer):
# Substitution for PyTorch nn.Conv2D
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D(filters=c2,
kernel_size=k,
strides=s,
padding='VALID',
use_bias=bias,
kernel_initializer=keras.initializers.Constant(
w.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
def call(self, inputs):
return self.conv(inputs)
class TFBottleneckCSP(keras.layers.Layer):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
self.bn = TFBN(w.bn)
self.act = lambda x: keras.activations.swish(x)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
y1 = self.cv3(self.m(self.cv1(inputs)))
y2 = self.cv2(inputs)
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
class TFC3(keras.layers.Layer):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFC3x(keras.layers.Layer):
# 3 module with cross-convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([
TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFSPP(keras.layers.Layer):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
def call(self, inputs):
x = self.cv1(inputs)
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
# Spatial pyramid pooling-Fast layer
def __init__(self, c1, c2, k=5, w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
def call(self, inputs):
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer):
# TF YOLOv5 Detect layer
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
super().__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [tf.zeros(1)] * self.nl # init grid
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
self.training = False # set to False after building model
self.imgsz = imgsz
for i in range(self.nl):
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
self.grid[i] = self._make_grid(nx, ny)
def call(self, inputs):
z = [] # inference output
x = []
for i in range(self.nl):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference
y = x[i]
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), )
@staticmethod
def _make_grid(nx=20, ny=20):
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFSegment(TFDetect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
super().__init__(nc, anchors, ch, imgsz, w)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
self.detect = TFDetect.call
def call(self, x):
p = self.proto(x[0])
# p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p)
class TFProto(keras.layers.Layer):
def __init__(self, c1, c_=256, c2=32, w=None):
super().__init__()
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
self.cv3 = TFConv(c_, c2, w=w.cv3)
def call(self, inputs):
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
class TFUpsample(keras.layers.Layer):
# TF version of torch.nn.Upsample()
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
super().__init__()
assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
# with default arguments: align_corners=False, half_pixel_centers=False
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
# size=(x.shape[1] * 2, x.shape[2] * 2))
def call(self, inputs):
return self.upsample(inputs)
class TFConcat(keras.layers.Layer):
# TF version of torch.concat()
def __init__(self, dimension=1, w=None):
super().__init__()
assert dimension == 1, 'convert only NCHW to NHWC concat'
self.d = 3
def call(self, inputs):
return tf.concat(inputs, self.d)
def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m_str = m
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [
nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3x]:
c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3x]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
elif m in [Detect, Segment]:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
args.append(imgsz)
else:
c2 = ch[f]
tf_m = eval('TF' + m_str.replace('nn.', ''))
m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
else tf_m(*args, w=model.model[i]) # module
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in torch_m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
return keras.Sequential(layers), sorted(save)
class TFModel:
# TF YOLOv5 model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
def predict(self,
inputs,
tf_nms=False,
agnostic_nms=False,
topk_per_class=100,
topk_all=100,
iou_thres=0.45,
conf_thres=0.25):
y = [] # outputs
x = inputs
for m in self.model.layers:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
x = m(x) # run
y.append(x if m.i in self.savelist else None) # save output
# Add TensorFlow NMS
if tf_nms:
boxes = self._xywh2xyxy(x[0][..., :4])
probs = x[0][:, :, 4:5]
classes = x[0][:, :, 5:]
scores = probs * classes
if agnostic_nms:
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
else:
boxes = tf.expand_dims(boxes, 2)
nms = tf.image.combined_non_max_suppression(boxes,
scores,
topk_per_class,
topk_all,
iou_thres,
conf_thres,
clip_boxes=False)
return (nms, )
return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
# return tf.concat([conf, cls, xywh], 1)
@staticmethod
def _xywh2xyxy(xywh):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
class AgnosticNMS(keras.layers.Layer):
# TF Agnostic NMS
def call(self, input, topk_all, iou_thres, conf_thres):
# wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
input,
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
name='agnostic_nms')
@staticmethod
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
boxes, classes, scores = x
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
scores_inp = tf.reduce_max(scores, -1)
selected_inds = tf.image.non_max_suppression(boxes,
scores_inp,
max_output_size=topk_all,
iou_threshold=iou_thres,
score_threshold=conf_thres)
selected_boxes = tf.gather(boxes, selected_inds)
padded_boxes = tf.pad(selected_boxes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
mode='CONSTANT',
constant_values=0.0)
selected_scores = tf.gather(scores_inp, selected_inds)
padded_scores = tf.pad(selected_scores,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode='CONSTANT',
constant_values=-1.0)
selected_classes = tf.gather(class_inds, selected_inds)
padded_classes = tf.pad(selected_classes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode='CONSTANT',
constant_values=-1.0)
valid_detections = tf.shape(selected_inds)[0]
return padded_boxes, padded_scores, padded_classes, valid_detections
def activations(act=nn.SiLU):
# Returns TF activation from input PyTorch activation
if isinstance(act, nn.LeakyReLU):
return lambda x: keras.activations.relu(x, alpha=0.1)
elif isinstance(act, nn.Hardswish):
return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
elif isinstance(act, (nn.SiLU, SiLU)):
return lambda x: keras.activations.swish(x)
else:
raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
def representative_dataset_gen(dataset, ncalib=100):
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
im = np.transpose(img, [1, 2, 0])
im = np.expand_dims(im, axis=0).astype(np.float32)
im /= 255
yield [im]
if n >= ncalib:
break
def run(
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=(640, 640), # inference size h,w
batch_size=1, # batch size
dynamic=False, # dynamic batch size
):
# PyTorch model
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
_ = model(im) # inference
model.info()
# TensorFlow model
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
_ = tf_model.predict(im) # inference
# Keras model
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary()
LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
run(**vars(opt))
if __name__ == '__main__':
opt = parse_opt()
main(opt)

391
ytracking/models/yolo.py Normal file
View File

@ -0,0 +1,391 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
YOLO-specific modules
Usage:
$ python models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import contextlib
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != 'Windows':
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import * # noqa
from models.experimental import * # noqa
from utils.autoanchor import check_anchor_order
from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
from utils.plots import feature_visualization
from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
time_sync)
try:
import thop # for FLOPs computation
except ImportError:
thop = None
class Detect(nn.Module):
# YOLOv5 Detect head for detection models
stride = None # strides computed during build
dynamic = False # force grid reconstruction
export = False # export mode
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use inplace ops (e.g. slice assignment)
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
if isinstance(self, Segment): # (boxes + masks)
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
class Segment(Detect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) # protos
self.detect = Detect.forward
def forward(self, x):
p = self.proto(x[0])
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
class BaseModel(nn.Module):
# YOLOv5 base model
def forward(self, x, profile=False, visualize=False):
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _profile_one_layer(self, m, x, dt):
c = m == self.model[-1] # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
class DetectionModel(BaseModel):
# YOLOv5 detection model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding='ascii', errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
s = 256 # 2x min stride
m.inplace = self.inplace
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _descale_pred(self, p, flips, scale, img_size):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
# Clip YOLOv5 augmented inference tails
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
class SegmentationModel(DetectionModel):
# YOLOv5 segmentation model
def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel):
# YOLOv5 classification model
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
super().__init__()
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
def _from_detection_model(self, model, nc=1000, cutoff=10):
# Create a YOLOv5 classification model from a YOLOv5 detection model
if isinstance(model, DetectMultiBackend):
model = model.model # unwrap DetectMultiBackend
model.model = model.model[:cutoff] # backbone
m = model.model[-1] # last layer
ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module
c = Classify(ch, nc) # Classify()
c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type
model.model[-1] = c # replace
self.model = model.model
self.stride = model.stride
self.save = []
self.nc = nc
def _from_yaml(self, cfg):
# Create a YOLOv5 classification model from a *.yaml file
self.model = None
def parse_model(d, ch): # model_dict, input_channels(3)
# Parse a YOLOv5 model.yaml dictionary
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--profile', action='store_true', help='profile model speed')
parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(vars(opt))
device = select_device(opt.device)
# Create model
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
model = Model(opt.cfg).to(device)
# Options
if opt.line_profile: # profile layer by layer
model(im, profile=True)
elif opt.profile: # profile forward-backward
results = profile(input=im, ops=[model], n=3)
elif opt.test: # test all models
for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
try:
_ = Model(cfg)
except Exception as e:
print(f'Error in {cfg}: {e}')
else: # report fused model summary
model.fuse()

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,49 @@
# YOLOv5 requirements
# Usage: pip install -r requirements.txt
# Base ------------------------------------------------------------------------
gitpython>=3.1.30
matplotlib>=3.3
numpy>=1.22.2
opencv-python>=4.1.1
Pillow>=7.1.2
psutil # system resources
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
thop>=0.1.1 # FLOPs computation
torch>=1.8.0 # see https://pytorch.org/get-started/locally (recommended)
torchvision>=0.9.0
tqdm>=4.64.0
ultralytics>=8.0.147
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
# Logging ---------------------------------------------------------------------
# tensorboard>=2.4.1
# clearml>=1.2.0
# comet
# Plotting --------------------------------------------------------------------
pandas>=1.1.4
seaborn>=0.11.0
# Export ----------------------------------------------------------------------
# coremltools>=6.0 # CoreML export
# onnx>=1.10.0 # ONNX export
# onnx-simplifier>=0.4.1 # ONNX simplifier
# nvidia-pyindex # TensorRT export
# nvidia-tensorrt # TensorRT export
# scikit-learn<=1.1.2 # CoreML quantization
# tensorflow>=2.4.0 # TF exports (-cpu, -aarch64, -macos)
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev>=2023.0 # OpenVINO export
# Deploy ----------------------------------------------------------------------
setuptools>=65.5.1 # Snyk vulnerability fix
# tritonclient[all]~=2.24.0
# Extras ----------------------------------------------------------------------
# ipython # interactive notebook
# mss # screenshots
# albumentations>=1.0.3
# pycocotools>=2.0.6 # COCO mAP

159
ytracking/track_.py Normal file
View File

@ -0,0 +1,159 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
import argparse
import csv
import os
import platform
import sys
from pathlib import Path
import glob
import numpy as np
import pickle
import torch
from tools.config import cfg
sys.path.append('./ytracking')
from models.common import DetectMultiBackend
from utils.dataloaders import LoadImages
from utils.general import (LOGGER, Profile, check_img_size, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, scale_boxes, strip_optimizer)
from utils.torch_utils import select_device, smart_inference_mode
'''集成跟踪模块,输出跟踪结果文件 .npy'''
# from ultralytics.engine.results import Boxes # Results
# from ultralytics.utils import IterableSimpleNamespace, yaml_load
from tracking.utils.plotting import Annotator, colors
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
from tracking.trackers import BOTSORT, BYTETracker
from tracking.utils.showtrack import drawtracks
import time
def init_trackers(tracker_yaml=None, bs=1):
"""
Initialize trackers for object tracking during prediction.
"""
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
trackers = []
for _ in range(bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
trackers.append(tracker)
return trackers
@smart_inference_mode()
def run(
# weights=cfg.tracking_model, # model path or triton URL
Model, # model path or triton URL
source=None, # file/dir/URL/glob/screen/0(webcam)
project=r'./runs/detect', # save results to project/name
tracker_yaml=cfg.botsort,
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
bs=1, # batch_size
save_img=True, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
line_thickness=3, # bounding box thickness (pixels)
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
):
if source is None:
raise ValueError("Have to provide --source argument")
# Load model
# device = select_device(device)
# model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
if Model is None:
raise ValueError("Have to provide --model argument")
model = Model.yoloModel
print(model.stride, model.names, model.pt)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
##=============================生成文件夹 save_dir存储检测跟踪图像
source = str(source)
save_dir = Path(project) / Path(source).stem
# Dataloader
seen, dt = 0, (Profile(), Profile(), Profile())
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
## ================================================= 生成跟踪器对象
tracker = init_trackers(tracker_yaml, bs)[0]
track_boxes = np.empty((0, 9), dtype=np.float32)
features_dict = {}
frameid_img = {}
for path, im, im0s, vid_cap, s in dataset:
# img preprocess
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
frameid_img[seen] = im0s.copy()
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
s += '%gx%g ' % im.shape[2:] # print string
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
# boxes_and_imgs.append((det.cpu().numpy(), im0, frame))
## ================================================================ writed by WQG
det_tracking = Boxes(det, im0.shape).cpu().numpy()
tracks = tracker.update(det_tracking, im0)
if len(tracks):
tracks[:, 7] = seen
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
frame_id = tracks[0, 7]
features_dict.update({int(frame_id): feat_dict})
return track_boxes, features_dict, frameid_img
def main():
ROOT = Path(Path.cwd())
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
optdict = {'weights': r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt",
'source': r"D:/Project/ieemoo-ai/testdata/88.mp4",
}
run(**optdict)
if __name__ == '__main__':
main()

226
ytracking/track_.py.bak Normal file
View File

@ -0,0 +1,226 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
import argparse
import csv
import os
import platform
import sys
from pathlib import Path
import glob
import numpy as np
import pickle
import torch
# =============================================================================
# FILE = Path(__file__).resolve()
# ROOT = FILE.parents[0] # YOLOv5 root directory
# if str(ROOT) not in sys.path:
# sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
# =============================================================================
sys.path.append('./ytracking')
from models.common import DetectMultiBackend
from utils.dataloaders import LoadImages
from utils.general import (LOGGER, Profile, check_img_size, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, scale_boxes, strip_optimizer)
from utils.torch_utils import select_device, smart_inference_mode
'''集成跟踪模块,输出跟踪结果文件 .npy'''
# from ultralytics.engine.results import Boxes # Results
# from ultralytics.utils import IterableSimpleNamespace, yaml_load
from tracking.utils.plotting import Annotator, colors
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
from tracking.trackers import BOTSORT, BYTETracker
from tracking.utils.showtrack import drawtracks
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
def init_trackers(tracker_yaml=None, bs=1):
"""
Initialize trackers for object tracking during prediction.
"""
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
trackers = []
for _ in range(bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
trackers.append(tracker)
return trackers
@smart_inference_mode()
def run(
weights=r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt", # model path or triton URL
source=r"D:/Project/ieemoo-ai/testdata/88.mp4", # file/dir/URL/glob/screen/0(webcam)
project=r'./runs/detect', # save results to project/name
name='exp', # save results to project/name
tracker_yaml="D:/Project/ieemoo-ai/ytracking/tracking/trackers/cfg/botsort.yaml",
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
bs=1, # batch_size
save_txt=False, # save results to *.txt
save_img=True, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidencesL
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
):
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
##=============================生成文件夹 save_dir存储检测跟踪图像
source = str(source)
save_dir = Path(project) / Path(source).stem
if save_dir.exists():
print(Path(source).stem)
# return
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
else:
save_dir.mkdir(parents=True, exist_ok=True)
# Dataloader
seen, dt = 0, (Profile(), Profile(), Profile())
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
## ================================================= 生成跟踪器对象
tracker = init_trackers(tracker_yaml, bs)[0]
track_boxes = np.empty((0, 9), dtype=np.float32)
features_dict = {}
for path, im, im0s, vid_cap, s in dataset:
# img preprocess
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
im0_ant = im0.copy()
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
s += '%gx%g ' % im.shape[2:] # print string
annotator = Annotator(im0_ant, line_width=line_thickness, example=str(names)) if save_img else None
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
# boxes_and_imgs.append((det.cpu().numpy(), im0, frame))
## ================================================================ writed by WQG
det_tracking = Boxes(det, im0.shape).cpu().numpy()
tracks = tracker.update(det_tracking, im0)
if len(tracks):
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
frame_id = track_boxes[0, 7]
features_dict.update({int(frame_id): feat_dict})
if annotator is not None:
for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
name = ('' if id == -1 else f'id:{int(id)} ') + names[int(cls)]
label = None if hide_labels else (name if hide_conf else f'{name} {conf:.2f}')
if id >= 0 and cls == 0:
color = colors(int(cls), True)
elif id >= 0 and cls != 0:
color = colors(int(id), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
annotator.box_label(xyxy, label, color=color)
# Save tracking image
if annotator is not None:
save_path_img, ext = os.path.splitext(save_path)
imgpath = save_path_img + f"_{dataset.frame}.png"
cv2.imwrite(Path(imgpath), annotator.result())
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
## ======================================================================== written by WQG
''' track_boxes: Array, [x1, y1, x2, y2, track_id, score, cls, frame_index, box_id] '''
if save_img:
filename = os.path.split(save_path_img)[-1]
'''====== save in './run/detect/' ======'''
imgshow = drawtracks(track_boxes)
showpath_1 = save_path_img + "_show.png"
cv2.imwrite(Path(showpath_1), imgshow)
'''====== save tracks data ======'''
tracks_dir = Path('D:/Project/ieemoo-ai/ytracking/tracking/tracking/data/tracks/')
if not tracks_dir.exists():
tracks_dir.mkdir(parents=True, exist_ok=True)
tracks_path = tracks_dir.joinpath(filename + ".npy")
np.save(tracks_path, track_boxes)
'''====== save reid features data ======'''
feats_dir = Path('D:/Project/ieemoo-ai/ytracking/tracking/data/trackfeats/')
if not feats_dir.exists():
feats_dir.mkdir(parents=True, exist_ok=True)
feats_path = feats_dir.joinpath(f'{filename}.pkl')
with open(feats_path, 'wb') as file:
pickle.dump(features_dict, file)
# Print results
t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
def main():
ROOT = Path(Path.cwd())
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
optdict = {'weights': r"D:/Project/ieemoo-ai/tools/ckpts/best_158734_cls11_noaug10.pt",
'source': r"D:/Project/ieemoo-ai/testdata/88.mp4",
}
run(**optdict)
if __name__ == '__main__':
main()

View File

Binary file not shown.

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

View File

@ -0,0 +1,357 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:16:01 2024
@author: ym
"""
import numpy as np
import cv2
from pathlib import Path
from scipy.spatial.distance import cdist
from ytracking.tracking.utils.mergetrack import track_equal_track
# curpath = Path(__file__).resolve().parents[0]
from tools.config import cfg
class MoveState:
"""商品运动状态标志"""
Static = 0
DownWard = 1
UpWard = 2
FreeMove = 3
HandHborder = 4
Unknown = -1
class ShoppingCart:
def __init__(self, bboxes):
self.bboxes = bboxes
self.loadrate = self.load_rate()
def load_rate(self):
bboxes = self.bboxes
fid = min(bboxes[:, 7])
idx = bboxes[:, 7] == fid
boxes = bboxes[idx]
temp = np.zeros(self.incart.shape, np.uint8)
for i in range(boxes.shape[0]):
x1, y1, x2, y2, tid = boxes[i, 0:5]
cv2.rectangle(temp, (int(x1), int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
'''1. and 滤除购物车边框外的干扰'''
loadstate = cv2.bitwise_and(self.incart, temp)
'''2. xor 得到购物车内内被填充的区域'''
# loadstate = cv2.bitwise_xor(self.incart, temp1)
num_loadstate = cv2.countNonZero(loadstate)
num_incart = cv2.countNonZero(self.incart)
loadrate = num_loadstate / (num_incart+0.01)
# edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png", cv2.IMREAD_GRAYSCALE)
# cv2.imwrite(f"./test/temp.png", cv2.add(temp, edgeline))
# cv2.imwrite(f"./test/incart.png", cv2.add(self.incart, edgeline))
# cv2.imwrite(f"./test/loadstate.png", cv2.add(loadstate, edgeline))
return loadrate
@property
def incart(self):
# img = cv2.imread(curpath/'cart_tempt'/'back_incart.png', cv2.IMREAD_GRAYSCALE)
img = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def outcart(self):
# img = cv2.imread(curpath/'cart_tempt'/'back_outcart.png', cv2.IMREAD_GRAYSCALE)
img = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def cartedge(self):
# img = cv2.imread(curpath/'cart_tempt'/'back_cartedge.png', cv2.IMREAD_GRAYSCALE)
img = cv2.imread(cfg.cartedge, cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
class Track:
'''抽象基类,不能实例化对象'''
def __init__(self, boxes, imgshape=(1024, 1280)):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
self.boxes = boxes
self.tid = int(boxes[0, 4])
self.cls = int(boxes[0, 6])
self.frnum = boxes.shape[0]
self.imgBorder = False
self.imgshape = imgshape
self.state = MoveState.Unknown
'''轨迹开始帧、结束帧 ID'''
self.start_fid = int(np.min(boxes[:, 7]))
self.end_fid = int(np.max(boxes[:, 7]))
# 根据需要,可以在子类中实现,降低顺序处理时的计算量
self.compute_cornpoints()
self.compute_cornpts_feats()
''' 基于 (x, y, w, h) 的一些计算haved deprecated
最后一帧与第一帧间的位移:
vshift: 正值为向下,负值为向上
hshift: 正值为物品向中心移动,负值为向购物车边框两边移动
'''
x0, y0 = (boxes[:,0] + boxes[:,2])/2, (boxes[:, 1] + boxes[:, 3])/2
mw, mh = np.mean(boxes[:, 2]-boxes[:, 0]), np.mean((boxes[:, 3]-boxes[:, 1]))
self.mwh = np.mean((mw, mh))
self.Area = mw * mh
self.vshift = y0[-1] - boxes[0]
self.hshift = abs(x0[0]-self.imgshape[0]/2) - abs(x0[-1]-self.imgshape[0]/2)
# self.boxmean = [np.mean(self.boxes[:, k]) for k in range(4)]
# self.mwh = np.mean(self.boxmean[2:])
# self.Area = boxes[:,2] * boxes[:,3]
# self.vshift = boxes[-1, 1] - boxes[0, 1]
# self.hshift = abs(boxes[0, 0]-self.imgshape[0]/2) - abs(boxes[-1, 0]-self.imgshape[0]/2)
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((self.frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
self.cornpoints = cornpoints
def compute_cornpts_feats(self):
'''
'''
trajectory = []
trajlens = []
trajdist = []
trajrects = []
for k in range(5):
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
X = self.cornpoints[:, 2*k:2*(k+1)]
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
trajectory.append(traj)
trajlen = np.sum(traj)
trajlens.append(trajlen)
ptdist = np.max(cdist(X, X))
trajdist.append(ptdist)
rect = cv2.minAreaRect(X.astype(np.int64))
trajrects.append(rect)
self.trajectory = trajectory
self.trajlens = trajlens
self.trajdist = trajdist
self.trajrects = trajrects
def trajfeature(self):
'''
分两种情况计算轨迹特征(检测框边界不在图像边界范围内,在图像边界范围内):
-最小长度轨迹trajmin
-最小轨迹长度trajlen_min
-最小轨迹欧氏距离trajdist_max
'''
idx1 = self.trajlens.index(max(self.trajlens))
trajmax = self.trajectory[idx1]
trajlen_max = self.trajlens[idx1]
trajdist_max = self.trajdist[idx1]
if not self.isCornpoint:
idx2 = self.trajlens.index(min(self.trajlens))
trajmin = self.trajectory[idx2]
trajlen_min = self.trajlens[idx2]
trajdist_min = self.trajdist[idx2]
else:
trajmin = self.trajectory[0]
trajlen_min = self.trajlens[0]
trajdist_min = self.trajdist[0]
'''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
trajlen_rate = trajlen_min/(trajlen_max+0.0001)
'''最小轨迹欧氏距离/目标框尺度均值'''
trajdist_rate = trajdist_min/(self.mwh+0.0001)
self.trajmin = trajmin
self.trajmax = trajmax
self.feature = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
class doTracks:
def __init__(self, bboxes, features_dict):
self.bboxes = bboxes
self.features_dict = features_dict
self.frameid = set(bboxes[:, 7])
self.trackid = set(bboxes[:, 4])
self.lboxes = self.array2list()
'''对 self.tracks 中的元素进行分类,将 track 归入相应列表中'''
self.Static = []
self.DownWard = []
self.UpWard = []
self.FreeMove = []
self.Hands = []
self.Kids = []
self.HandHborder = []
self.Disruptors = []
self.Residual = []
self.Merged = []
def array2list(self):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
track_ids = set(self.bboxes[:, 4])
lboxes = []
for t_id in track_ids:
# print(f"The ID is: {t_id}")
idx = np.where(self.bboxes[:, 4] == t_id)[0]
box = self.bboxes[idx, :]
lboxes.append(box)
return lboxes
def similarity(self):
nt = len(self.tracks)
similar_dict = {}
if nt >= 2:
for i in range(nt):
for j in range(i, nt):
tracka = self.tracks[i]
trackb = self.tracks[j]
similar = self.feat_similarity(tracka, trackb)
similar_dict.update({(tracka.tid, trackb.tid): similar})
return similar_dict
def feat_similarity(self, tracka, trackb, metric='cosine'):
boxes_a, boxes_b = tracka.boxes, trackb.boxes
na, nb = tracka.boxes.shape[0], trackb.boxes.shape[0]
feata, featb = [], []
for i in range(na):
fid, bid = tracka.boxes[i, 7:9]
feata.append(self.features_dict[fid][bid])
for i in range(nb):
fid, bid = trackb.boxes[i, 7:9]
featb.append(self.features_dict[fid][bid])
feata = np.asarray(feata, dtype=np.float32)
featb = np.asarray(featb, dtype=np.float32)
similarity_matrix = 1-np.maximum(0.0, cdist(feata, featb, metric))
feata_m = np.mean(feata, axis =0)[None,:]
featb_m = np.mean(featb, axis =0)[None,:]
simi_ab = 1 - cdist(feata_m, featb_m, metric)
print(f'tid {int(boxes_a[0, 4])} vs {int(boxes_b[0, 4])}: {simi_ab[0][0]}')
# return np.max(similarity_matrix)
return simi_ab
def merge_tracks_loop(self, alist):
na, nb = len(alist), 0
while na!=nb:
na = len(alist)
alist = self.merge_tracks(alist)
nb = len(alist)
return alist
def base_merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
"""
mergedTracks = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
if track_equal_track(atrack, btrack, self.features_dict):
cur_list.append(btrack)
else:
alist.append(btrack)
mergedTracks.append(cur_list)
return mergedTracks
@staticmethod
def join_tracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
exists[t.tid] = 1
res.append(t)
for t in tlistb:
tid = t.tid
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
@staticmethod
def sub_tracks(tlista, tlistb):
track_ids_b = {t.tid for t in tlistb}
return [t for t in tlista if t.tid not in track_ids_b]

View File

@ -0,0 +1,204 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:36:31 2024
@author: ym
"""
import numpy as np
from ytracking.tracking.dotrack.dotracks import doTracks, ShoppingCart
from ytracking.tracking.dotrack.track_back import backTrack
class doBackTracks(doTracks):
def __init__(self, bboxes, features_dict):
super().__init__(bboxes, features_dict)
self.tracks = [backTrack(b) for b in self.lboxes]
# self.similar_dict = self.similarity()
self.shopcart = ShoppingCart(bboxes)
# =============================================================================
# def array2list(self):
# ''' 0, 1, 2, 3, 4, 5, 6, 7, 8
# bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# lboxes[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# '''
#
# track_ids = set(self.bboxes[:, 4])
# lboxes = []
# for t_id in track_ids:
# idx = np.where(self.bboxes[:, 4] == t_id)[0]
# box = self.bboxes[idx, :]
#
# x = (box[:, 0] + box[:, 2]) / 2
# y = (box[:, 1] + box[:, 3]) / 2
#
# # box: [x, y, w, h, track_id, score, cls, frame_index]
# box[:, 2] = box[:, 2] - box[:, 0]
# box[:, 3] = box[:, 3] - box[:, 1]
# box[:, 0] = x
# box[:, 1] = y
#
# lboxes.append(box)
#
#
# return lboxes
# =============================================================================
def classify(self):
'''
功能:对 tracks 中元素分类
'''
tracks = self.tracks
shopcart = self.shopcart
# 提取手的frame_id并和动目标的frame_id 进行关联
hand_tracks = [t for t in tracks if t.cls==0]
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
# 提取小孩的track并计算状态left, right, incart
kid_tracks = [t for t in tracks if t.cls==9]
kid_states = [self.kid_state(t) for t in kid_tracks]
self.Kids = [x for x in zip(kid_tracks, kid_states)]
tracks = self.sub_tracks(tracks, kid_tracks)
'''静态情况 1: 目标关键点最小相对运动轨迹 < 0.2, 指标值偏大
feature = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
'''
track1 = [t for t in tracks if t.feature[5] < 0.2
or t.feature[3] < 120
]
'''静态情况 2: 目标初始状态为静止,适当放宽关键点最小相对运动轨迹 < 0.5'''
track2 = [t for t in tracks if t.static_index.size > 0
and t.static_index[0, 0] <= 2
and t.feature[5] < 0.5]
'''静态情况 3: 目标初始状态和最终状态均为静止'''
track3 = [t for t in tracks if t.static_index.shape[0] >= 2
and t.static_index[0, 0] <= 2
and t.static_index[-1, 1] >= t.frnum-3]
track12 = self.join_tracks(track1, track2)
'''提取静止状态的 track'''
static_tracks = self.join_tracks(track12, track3)
self.Static.extend(static_tracks)
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
'''购物框边界外具有运动状态的干扰目标'''
trcak4 = [t for t in tracks if self.isouttrack(t)]
tracks = self.sub_tracks(tracks, trcak4)
'''轨迹循环归并'''
# merged_tracks = self.merge_tracks(tracks)
merged_tracks = self.merge_tracks_loop(tracks)
self.Residual = merged_tracks
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
"""
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
newtracks.append(backTrack(boxes_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged
def kid_state(self, track):
left_dist = track.cornpoints[:, 2]
right_dist = 1024 - track.cornpoints[:, 4]
if np.sum(left_dist<30)/track.frnum>0.8 and np.sum(right_dist>512)/track.frnum>0.7:
kidstate = "left"
elif np.sum(left_dist>512)/track.frnum>0.7 and np.sum(right_dist<30)/track.frnum>0.8:
kidstate = "right"
else:
kidstate = "incart"
return kidstate
def hand_association(self):
"""
分析商品和手之间的关联性
"""
pass
def isouttrack(self, track):
if track.posState <= 1:
isout = True
else:
isout = False
return isout
def isuptrack(self, track):
Flag = False
return Flag
def isdowntrack(self, track):
Flag = False
return Flag
def isfreetrack(self, track):
Flag = False
return Flag

View File

@ -0,0 +1,190 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:38:20 2024
@author: ym
"""
import numpy as np
from ytracking.tracking.utils.mergetrack import track_equal_track
from .dotracks import doTracks
from .track_front import frontTrack
class doFrontTracks(doTracks):
def __init__(self, bboxes, features_dict):
super().__init__(bboxes, features_dict)
self.tracks = [frontTrack(b) for b in self.lboxes]
def classify(self):
'''功能:对 tracks 中元素分类 '''
tracks = self.tracks
'''提取手的 tracks'''
hand_tracks = [t for t in tracks if t.cls==0]
for htrack in hand_tracks:
htrack.extract_hand_features()
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
'''提取小孩的 tracks'''
kid_tracks = [t for t in tracks if t.cls==9]
tracks = self.sub_tracks(tracks, kid_tracks)
'''静态 tracks'''
static_tracks = [t for t in tracks if t.frnum>1 and t.is_Static()]
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
'''轨迹循环归并'''
merged_tracks = self.merge_tracks_loop(tracks)
tracks = [t for t in merged_tracks if t.frnum > 1]
for gtrack in tracks:
# print(f"Goods ID:{gtrack.tid}")
for htrack in hand_tracks:
if self.is_associate_with_hand(htrack, gtrack):
gtrack.hands.append(htrack)
freemoved_tracks = [t for t in tracks if t.is_FreeMove()]
tracks = self.sub_tracks(tracks, freemoved_tracks)
self.Residual = tracks
def is_associate_with_hand(self, htrack, gtrack):
'''手部 Track、商品 Track 建立关联的依据:
a. 运动帧的帧索引有交集
b. 帧索引交集部分iou均大于0
'''
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
hboxes = np.empty(shape=(0, 9), dtype = float)
gboxes = np.empty(shape=(0, 9), dtype = float)
# start, end 为索引值,需要 start:(end+1)
for start, end in htrack.dynamic_y2:
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
for start, end in gtrack.dynamic_y1:
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
fids = set(hfids).intersection(set(gfids))
if len(fids)==0:
return False
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
ious = []
for f in fids:
h = np.where(hfids==f)[0][0]
g = np.where(gfids==f)[0][0]
x11, y11, x12, y12 = hboxes[h, 0:4]
x21, y21, x22, y22 = gboxes[g, 0:4]
x1, y1 = max((x11, x21)), max((y11, y21))
x2, y2 = min((x12, x22)), min((y12, y22))
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
area1 = (x12 - x11) * (y12 - y11)
area2 = (x22 - x21) * (y22 - y21)
iou = union / (area1 + area2 - union + 1e-6)
if iou>0:
ious.append(iou)
return len(ious)
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
"""
# =============================================================================
# mergedTracks = []
# alist = [t for t in Residual]
# while alist:
# atrack = alist[0]
# cur_list = []
# cur_list.append(atrack)
# alist.pop(0)
#
# blist = [b for b in alist]
# alist = []
# for btrack in blist:
# if track_equal_track(atrack, btrack, self.features_dict):
# cur_list.append(btrack)
# else:
# alist.append(btrack)
#
# mergedTracks.append(cur_list)
# =============================================================================
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
newtracks.append(frontTrack(boxes_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged
# =============================================================================
# def array2list(self):
# '''
# 将 bboxes 变换为 track 列表
# bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# Return
# lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
# [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# '''
# track_ids = set(self.bboxes[:, 4])
# lboxes = []
# for t_id in track_ids:
# # print(f"The ID is: {t_id}")
# idx = np.where(self.bboxes[:, 4] == t_id)[0]
# box = self.bboxes[idx, :]
#
# lboxes.append(box)
#
# return lboxes
# =============================================================================

View File

@ -0,0 +1,304 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:28:47 2024
@author: ym
"""
import cv2
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from .dotracks import MoveState, Track
from tools.config import cfg
class backTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, imgshape=(1024, 1280)):
super().__init__(boxes, imgshape)
'''(cent_x, cent_y, mean_w, mean_h)'''
'''5个关键点中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
'''该函数依赖项: self.cornpoints'''
self.isCornpoint = self.isimgborder()
'''该函数依赖项: self.cornpoints、self.isCornpoint定义 10 个轨迹特征:
self.trajectory, self.trajmin, self.trajlens, self.trajdist
self.feature = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate] '''
self.trajfeature()
'''静止点帧索引'''
self.static_index = self.compute_static_fids()
'''运动点帧索引(运动帧两端的静止帧索引)'''
self.moving_index = self.compute_moving_fids()
'''该函数依赖项: self.cornpoints定义 4 个商品位置变量:
self.Cent_isIncart, self.LB_isIncart, self.RB_isIncart
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart'''
self.PositionState()
'''self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates'''
self.compute_ious_feat()
# self.PCA()
# =============================================================================
# def compute_cornpoints(self):
# '''
# cornpoints 共10项分别是个点的坐标值x, y
# (center, top_left, top_right, bottom_left, bottom_right)
# '''
# boxes = self.boxes
# cornpoints = np.zeros((self.frnum, 10))
# cornpoints[:,0], cornpoints[:,1] = boxes[:, 0], boxes[:, 1]
# cornpoints[:,2], cornpoints[:,3] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
# cornpoints[:,4], cornpoints[:,5] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
# cornpoints[:,6], cornpoints[:,7] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
# cornpoints[:,8], cornpoints[:,9] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
#
# self.cornpoints = cornpoints
# =============================================================================
def isimgborder(self, BoundPixel=10, BoundThresh=0.3):
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
cont1 = sum(abs(x1)<BoundPixel) / self.frnum > BoundThresh
cont2 = sum(abs(y1)<BoundPixel) / self.frnum > BoundThresh
cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) / self.frnum > BoundThresh
cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) / self.frnum > BoundThresh
cont = cont1 or cont2 or cont3 or cont4
isCornpoint = False
if cont:
isCornpoint = True
return isCornpoint
def PositionState(self, camerType="back"):
'''
camerType: back, 后置摄像头
front, 前置摄像头
'''
if camerType=="front":
incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
else:
incart = cv2.imread(cfg.incart_ftmp, cv2.IMREAD_GRAYSCALE)
xc, yc = self.cornpoints[:,0].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,1].clip(0,self.imgshape[1]-1).astype(np.int64)
x1, y1 = self.cornpoints[:,6].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,7].clip(0,self.imgshape[1]-1).astype(np.int64)
x2, y2 = self.cornpoints[:,8].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,9].clip(0,self.imgshape[1]-1).astype(np.int64)
# print(self.tid)
Cent_inCartnum = np.count_nonzero(incart[(yc, xc)])
LB_inCartnum = np.count_nonzero(incart[(y1, x1)])
RB_inCartnum = np.count_nonzero(incart[(y2, x2)])
self.Cent_isIncart = False
self.LB_isIncart = False
self.RB_isIncart = False
if Cent_inCartnum: self.Cent_isIncart = True
if LB_inCartnum: self.LB_isIncart = True
if RB_inCartnum: self.RB_isIncart = True
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart
def PCA(self):
self.pca = PCA()
X = self.cornpoints[:, 0:2]
self.pca.fit(X)
def compute_ious_feat(self):
'''输出:
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
其中:
boxes流track中所有boxes形成的轨迹图可分为三部分incart, outcart, cartboarder
incart_iou, outcart_iou, cartboarder_iou各部分和 boxes流的 iou。
incart_iou = 0track在购物车外
outcart_iou = 0track在购物车内也可能是通过左下角、右下角置入购物车
maxbox_iou, minbox_ioutrack中最大、最小 box 和boxes流的iou二者差值越小越接近 1表明track的运动型越小。
incartrates: 各box和incart的iou时序由小变大反应的是置入过程由大变小反应的是取出过程
'''
incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
cartboarder = cv2.imread(cfg.cartboarder, cv2.IMREAD_GRAYSCALE)
incartrates = []
temp = np.zeros(incart.shape, np.uint8)
maxarea, minarea = 0, self.imgshape[0]*self.imgshape[1]
for i in range(self.frnum):
# x, y, w, h = self.boxes[i, 0:4]
x = (self.boxes[i, 2] + self.boxes[i, 0]) / 2
w = (self.boxes[i, 2] - self.boxes[i, 0]) / 2
y = (self.boxes[i, 3] + self.boxes[i, 1]) / 2
h = (self.boxes[i, 3] - self.boxes[i, 1]) / 2
if w*h > maxarea: maxarea = w*h
if w*h < minarea: minarea = w*h
cv2.rectangle(temp, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp1 = np.zeros(incart.shape, np.uint8)
cv2.rectangle(temp1, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp2 = cv2.bitwise_and(incart, temp1)
inrate = cv2.countNonZero(temp1)/(w*h)
incartrates.append(inrate)
isincart = cv2.bitwise_and(incart, temp)
isoutcart = cv2.bitwise_and(outcart, temp)
iscartboarder = cv2.bitwise_and(cartboarder, temp)
num_temp = cv2.countNonZero(temp)
num_incart = cv2.countNonZero(isincart)
num_outcart = cv2.countNonZero(isoutcart)
num_cartboarder = cv2.countNonZero(iscartboarder)
incart_iou = num_incart/num_temp
outcart_iou = num_outcart/num_temp
cartboarder_iou = num_cartboarder/num_temp
maxbox_iou = maxarea/num_temp
minbox_iou = minarea/num_temp
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
def compute_static_fids(self, thresh1 = 12, thresh2 = 3):
'''
计算 track 的轨迹中相对处于静止状态的轨迹点的start_frame_id, end_frame_id
thresh1: 相邻两帧目标中心点是否静止的的阈值,以像素为单位,
thresh2: 连续捕捉到目标处于静止状态的帧数
'''
BoundPixel = 8
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
cont1 = sum(abs(x1)<BoundPixel) > 3
# cont2 = sum(abs(y1)<BoundPixel) > 3
cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) > 3
# cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) > 3
cont = not(cont1 or cont3)
## ============== 下一步,启用中心点,选择具有最小运动幅度的角点作为参考点
static_index = []
if self.frnum>=2 and cont:
x1 = self.boxes[1:,7]
x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
dist_adjc = np.interp(x2, x1, self.trajmin)
# dist_adjc = self.trajmin
static_thresh = (dist_adjc < thresh1)[:, None].astype(np.uint8)
static_cnts, _ = cv2.findContours(static_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for cnt in static_cnts:
_, start, _, num = cv2.boundingRect(cnt)
end = start + num
if num <= thresh2:
continue
static_index.append((start, end))
# =============================================================================
# '''========= 输出帧id不太合适 ========='''
# static_fids = []
# for i in range(len(static_index)):
# i1, i2 = static_index[i]
# fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
# static_fids.append([fid1, fid2])
# static_fids = np.array(static_fids)
# =============================================================================
static_index = np.array(static_index)
if static_index.size:
indx = np.argsort(static_index[:, 0])
static_index = static_index[indx]
return static_index
def compute_moving_fids(self, thresh1 = 12, thresh2 = 3):
'''
计算 track 的轨迹中运动轨迹点的start_frame_id, end_frame_id
thresh1: 相邻两帧目标中心点是否运动的阈值,以像素为单位,
thresh2: 连续捕捉到目标连续运动的帧数
目标:
1. 计算轨迹方向
2. 计算和手部运动的关联性
'''
moving_index = []
if self.frnum>=2:
x1 = self.boxes[1:,7]
x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
dist_adjc = np.interp(x2, x1, self.trajmin)
moving_thresh = (dist_adjc >= thresh1)[:, None].astype(np.uint8)
moving_cnts, _ = cv2.findContours(moving_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
for cnt in moving_cnts:
_, start, _, num = cv2.boundingRect(cnt)
if num < thresh2:
continue
end = start + num
moving_index.append((start, end))
# =============================================================================
# '''========= 输出帧id不太合适 ========='''
# moving_fids = []
# for i in range(len(moving_index)):
# i1, i2 = moving_index[i]
# fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
# moving_fids.append([fid1, fid2])
# moving_fids = np.array(moving_fids)
# =============================================================================
moving_index = np.array(moving_index)
if moving_index.size:
indx = np.argsort(moving_index[:, 0])
moving_index = moving_index[indx]
return moving_index
def compute_distance(self):
pass
def move_start_fid(self):
pass
def move_end_fid(self):
pass

View File

@ -0,0 +1,275 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:33:01 2024
@author: ym
"""
import numpy as np
from sklearn.cluster import KMeans
from .dotracks import MoveState, Track
class frontTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, imgshape=(1024, 1280)):
super().__init__(boxes, imgshape)
self.hands = []
self.mwh = np.mean((self.boxes[:, 2] - self.boxes[:, 0]) *
(self.boxes[:, 3] - self.boxes[:, 1]))
'''5个关键点中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
self.CART_HIGH_THRESH1 = imgshape[1]/2.98
# if self.tid==10:
# print(f"ID: {self.tid}")
'''y1、y2静止状态区间值是 boxes 中对 axis=0 的索引,不是帧索引'''
self.static_y1, self.dynamic_y1 = self.compute_static_fids(boxes[:, 1])
self.static_y2, self.dynamic_y2 = self.compute_static_fids(boxes[:, 3])
self.isCornpoint = self.is_left_right_cornpoint()
self.isBotmpoint = self.is_bottom_cornpoint()
self.trajfeature()
'''手部状态分析'''
self.HAND_STATIC_THRESH = 100
self.CART_POSIT_0 = 430
self.CART_POSIT_1 = 620
def is_left_right_cornpoint(self):
''' 基于 all(boxes)
boxes左下角点和图像左下角点重叠 或
boxes右下角点和图像左下角点重叠
'''
x1, y1 = self.boxes[:, 0], self.boxes[:, 1]
x2, y2 = self.boxes[:, 2], self.boxes[:, 3]
# Left-Bottom cornpoint
condt1 = all(x1 < 5) and all(y2 > self.imgshape[1]-5)
# Right-Bottom cornpoint
condt2 = all(x2 > self.imgshape[0]-5) and all(y2 > self.imgshape[1]-5)
condt = condt1 or condt2
return condt
def is_edge_cornpoint(self):
'''基于 all(boxes)boxes是否和图像左右边缘重叠'''
x1, x2 = self.boxes[:, 0], self.boxes[:, 2]
condt = all(x1 < 3) or all(x2 > self.imgshape[0]-3)
return condt
def is_bottom_cornpoint(self):
'''基于 all(boxes)boxes是否和图像下边缘重叠'''
condt = all(self.boxes[:, 3] > self.imgshape[1]-20)
return condt
def compute_static_fids(self, y, STATIC_THRESH = 8):
'''
前摄时y一般选择为 box 的 y1 坐标,且需限定商品在购物车内。
inputs
y1D array
parameters
STATIC_THRESH轨迹处于静止状态的阈值。
outputs
输出为差分值小于 STATIC_THRESH 的y中元素的start, end索引
ranges = [(x1, y1),
(x1, y1),
...]
'''
# print(f"The ID is: {self.tid}")
det_y = np.diff(y, axis=0)
ranges, rangex = [], []
static_indices = np.where(np.abs(det_y) < STATIC_THRESH)[0]
if len(static_indices) == 0:
rangex.append((0, len(y)-1))
return ranges, rangex
start_index = static_indices[0]
for i in range(1, len(static_indices)):
if static_indices[i] != static_indices[i-1] + 1:
ranges.append((start_index, static_indices[i-1] + 1))
start_index = static_indices[i]
ranges.append((start_index, static_indices[-1] + 1))
if len(ranges) == 0:
rangex.append((0, len(y)-1))
return ranges, rangex
idx1, idx2 = ranges[0][0], ranges[-1][1]
if idx1 != 0:
rangex.append((0, idx1))
# 轨迹的最后阶段是运动状态
for k in range(1, len(ranges)):
index1 = ranges[k-1][1]
index2 = ranges[k][0]
rangex.append((index1, index2))
if idx2 != len(y)-1:
rangex.append((idx2, len(y)-1))
return ranges, rangex
def is_Static(self):
assert self.frnum > 1, "boxes number must greater than 1"
# print(f"The ID is: {self.tid}")
# 手部和小孩目标不考虑
if self.cls == 0 or self.cls == 9:
return False
# boxes 全部 y2=1280
if self.isBotmpoint:
return True
boxes = self.boxes
y0 = (boxes[:, 1]+boxes[:, 3])/2
## 纵轴矢量和
sum_y0 = y0[-1] - y0[0]
sum_y1 = boxes[-1, 1]-boxes[0, 1]
sum_y2 = boxes[-1, 3]-boxes[0, 3]
# 一些需要考虑的特殊情况
isbottom = max(boxes[:, 3]) > 1280-3
istop = min(boxes[:, 1]) < 3
isincart = min(y0) > self.CART_HIGH_THRESH1
uncert = abs(sum_y1)<100 and abs(sum_y2)<100
'''初始条件:商品中心点始终在购物车内、'''
condt0 = max((boxes[:, 1]+boxes[:, 3])/2) > self.CART_HIGH_THRESH1
'''条件1轨迹运动纵向和y1 或 y2描述商品轨迹长度存在情况
(1). 检测框可能与图像上下边缘重合,
(2). 上边或下边存在跳动
'''
if isbottom and istop:
condt1 = abs(sum_y0) < 300
elif isbottom: # y2在底部用y1表征运动
condt1 = sum_y1 > -120 and abs(sum_y0)<80 # 有底部点方向向上阈值小于100
elif istop: # y1在顶部用y2表征运动
condt1 = abs(sum_y2) < 100
else:
condt1 = (abs(sum_y1) < 30 or abs(sum_y2)<30)
'''条件2轨迹的开始和结束阶段均处于静止状态, 利用静止状态区间判断,用 y1
a. 商品在购物车内,
b. 检测框的起始阶段和结束阶段均为静止状态
c. 静止帧长度 > 3'''
condt2 = False
if len(self.static_y1)>=2:
condt_s0 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 3
condt_s1 = self.static_y1[-1][1]==self.frnum-1 and self.static_y1[-1][1] - self.static_y1[-1][0] >= 3
condt2 = condt_s0 and condt_s1 and isincart
condt = condt0 and (condt1 or condt2)
return condt
def extract_hand_features(self):
self.isHandStatic = False
x0 = (self.boxes[:, 0] + self.boxes[:, 2]) / 2
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
handXy = np.stack((x0, y0), axis=-1)
handMaxY0 = np.max(y0)
handCenter = np.array([(max(x0)+min(x0))/2, (max(y0)+min(y0))/2])
handMaxDist = np.max(np.linalg.norm(handXy - handCenter))
if handMaxDist < self.HAND_STATIC_THRESH:
self.isHandStatic = True
return
def is_Upward(self):
'''判断商品是否取出,'''
print(f"The ID is: {self.tid}")
def is_FreeMove(self):
if self.frnum == 1:
return True
# print(f"The ID is: {self.tid}")
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
det_y0 = np.diff(y0, axis=0)
sum_y0 = y0[-1] - y0[0]
'''情况1中心点向下 '''
## 初始条件:商品第一次检测到在购物车内
condt0 = y0[0] > self.CART_HIGH_THRESH1
condt_a = False
## 条件1商品初始为静止状态静止条件应严格一些
condt11, condt12 = False, False
if len(self.static_y1)>0:
condt11 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 5
if len(self.static_y2)>0:
condt12 = self.static_y2[0][0]==0 and self.static_y2[0][1] - self.static_y2[0][0] >= 5
# 条件2商品中心发生向下移动
condt2 = y0[-1] > y0[0]
# 综合判断a
condt_a = condt0 and (condt11 or condt12) and condt2
'''情况2中心点向上 '''
## 商品中心点向上移动但没有关联的Hand轨迹也不是左右边界点
condt_b = condt0 and len(self.hands)==0 and y0[-1] < y0[0] and (not self.is_edge_cornpoint())
'''情况3: 商品在购物车内,但运动方向无序'''
## 中心点在购物车内,纵向轨迹和小于轨迹差中绝对值最大的两个值的和,说明运动没有主方向
condt_c = False
if self.frnum > 3:
condt_c = all(y0>self.CART_HIGH_THRESH1) and \
(abs(sum_y0) < sum(np.sort(np.abs(det_y0))[::-1][:2])-1)
condt = (condt_a or condt_b or condt_c) and self.cls!=0
return condt

View File

@ -0,0 +1,254 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 17:33:00 2023
@author: ym
"""
import sys
import cv2
import os
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from pathlib import Path
from datetime import datetime
# ================= using for import ultralytics
# sys.path.append(r"D:\DeepLearning\yolov5_track")
# from utils.proBoxes import boxes_add_fid
from ytracking.tracking.utils.plotting import boxing_img # , Annotator, colors,
from ytracking.tracking.utils.gen import Profile
from ytracking.tracking.utils.drawtracks import draw5points, drawTrack, drawtracefeat, plot_frameID_y2, drawFeatures, \
draw_all_trajectories
from ytracking.tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
from ytracking.tracking.trackers import BOTSORT, BYTETracker
sys.path.append("ytracking/tracking/")
from dotrack.dotracks_back import doBackTracks
from dotrack.dotracks_front import doFrontTracks
# from utils.mergetrack import track_equal_track
# from utils.basetrack import MoveState, ShoppingCart, doTracks
def init_tracker(tracker_yaml=None, bs=1):
"""
Initialize tracker for object tracking during prediction.
"""
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
return tracker
def have_tracked_front():
'''前摄轨迹处理。已执行跟踪处理,只对跟踪结果进行分析'''
featdir = r"./data/trackfeats"
npydir = r"./data/tracks"
k = 0
gt = Profile()
for filename in os.listdir(npydir):
# filename = "084501222314_20240108-143651_front.npy"
if not filename.find("front") >= 0: continue
file, ext = os.path.splitext(filename)
fpath = os.path.join(npydir, filename)
featpath = os.path.join(featdir, file + '.pkl')
bboxes = np.load(fpath)
features_dict = np.load(featpath, allow_pickle=True)
with gt:
vts = doFrontTracks(bboxes, features_dict)
vts.classify()
plt = plot_frameID_y2(vts)
plt.savefig(f'./result/{file}_y2.png')
plt.close()
print(file + f" need time: {gt.dt:.2f}s")
# edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
# draw_all_trajectories(vts, edgeline, save_dir, filename)
# k += 1
# if k == 1:
# break
def have_tracked_back():
'''后摄轨迹处理。已执行跟踪处理,只对跟踪结果进行分析'''
featdir = r"./data/trackfeats"
npydir = r"./data/tracks"
k = 0
alltracks = []
gt = Profile()
for filename in os.listdir(npydir):
# filename = "084501222314_20240108-143656_back.npy" # "加购_55.npy"
if not filename.find("back") >= 0: continue
t1 = time.time()
file, ext = os.path.splitext(filename)
fpath = os.path.join(npydir, filename)
featpath = os.path.join(featdir, file + '.pkl')
# try:
bboxes = np.load(fpath)
features_dict = np.load(featpath, allow_pickle=True)
with gt:
vts = doBackTracks(bboxes, features_dict)
vts.classify()
# vts.merge_tracks()
print(file + f" need time: {gt.dt:.2f}s")
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
draw_all_trajectories(vts, edgeline, save_dir, filename)
alltracks.append(vts)
# except Exception as e:
# # print(str(e))
# pass
# print(file+" need time: {:.2f}s".format(time.time()-t1))
k += 1
if k == 1:
break
if len(alltracks):
drawFeatures(alltracks, save_dir)
def tracking(vboxes):
tracker_yaml = r"./trackers/cfg/botsort.yaml"
tracker = init_tracker(tracker_yaml)
tboxes = []
images = []
track_boxes = np.empty((0, 9), dtype=np.float32)
features_dict = {}
'''==================== 执行跟踪处理 ======================='''
for det, img, frame in vboxes:
# 需要根据frame_id重排序
det_tracking = Boxes(det).cpu().numpy()
H, W = img.shape[:2]
imgs = []
for d in range(np.size(det, 0)):
tlbr = det[d, :4].astype(np.int_)
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
patch = img[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
patch = patch[:, :, ::-1] # 原程序用PIL.Image读取是RGBOpencv是BGR
imgs.append(patch)
tracks = tracker.update(det_tracking, imgs)
if len(tracks):
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
frame_id = tracks[0, 7]
features_dict.update({int(frame_id): feat_dict})
# det = tracks[:, :-1]
# tboxes.append((det, frame))
imgx = boxing_img(tracks, img)
images.append((imgx, frame))
# bboxes = boxes_add_fid(tboxes)
vts = doBackTracks(track_boxes, features_dict)
vts.classify()
return vts, images
def do_tracking():
pkldir = r"./data/boxes_imgs"
k = 0
save_result = True
alltracks = []
gt = Profile()
for filename in os.listdir(pkldir):
filename = "加购_18.pkl"
file, _ = os.path.splitext(filename)
vboxes = []
##================================ load the detection data
with open(pkldir + f'/{filename}', 'rb') as f:
vboxes = pickle.load(f)
assert len(vboxes) > 0
with gt:
vts, images = tracking(vboxes)
alltracks.append(vts)
print(file + f" need time: {gt.dt * 1E3:.1f}ms")
##================================ save images, video, track-trajectory
if save_result == True:
curdir = imgdir.joinpath(file)
if not curdir.exists():
curdir.mkdir(parents=True, exist_ok=True)
vidpath = str(curdir.joinpath(file).with_suffix('.mp4'))
fps, w, h = 30, images[0][0].shape[1], images[0][0].shape[0]
vidwriter = cv2.VideoWriter(vidpath, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
for img, frame in images:
imgpath = curdir.joinpath(file + f"_{frame}.png")
cv2.imwrite(imgpath, img)
vidwriter.write(img)
vidwriter.release()
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
draw_all_trajectories(vts, edgeline, save_dir, filename)
k += 1
if k == 1:
break
drawFeatures(alltracks, save_dir)
def have_tracked(bboxes, features_dict, camera_id):
if camera_id == '0':
vts = doBackTracks(bboxes, features_dict)
vts.classify()
elif camera_id == '1':
vts = doFrontTracks(bboxes, features_dict)
vts.classify()
else:
raise ValueError("have no camera_id")
return vts
if __name__ == "__main__":
now = datetime.now()
time_string = now.strftime("%Y%m%d%H%M%S")[:8]
# save_dir = Path(f'./result/{time_string}_traj/')
# if not save_dir.exists():
# save_dir.mkdir(parents=True, exist_ok=True)
save_dir = Path(f'./result/')
mode = "merge" ## "merge": 已完成跟踪处理, "other": 未执行跟踪处理
if mode == "merge":
# have_tracked_back()
have_tracked_front()
else:
'''执行do_tracking()函数时视频和图像存储位置'''
imgdir = Path(f'./result/{time_string}_imgs/')
if not imgdir.exists():
imgdir.mkdir(parents=True, exist_ok=True)
do_tracking()

View File

@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 11:04:48 2024
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
# from trackers.utils import matching
def readDict(boxes, feat_dicts):
feat = []
for i in range(boxes.shape[0]):
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
feat.append(feat_dicts[fid][bid])
# img = feat_dicts[fid][f'{bid}_img']
# cv2.imwrite(f'./result/imgs/{tid}_{fid}_{bid}.png', img)
return np.asarray(feat, dtype=np.float32)
def track_equal_track(atrack, btrack, feat_dicts):
# boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
aboxes = atrack.boxes
bboxes = btrack.boxes
''' 1. 判断轨迹在时序上是否有交集 '''
afids = aboxes[:, 7].astype(np.int_)
bfids = bboxes[:, 7].astype(np.int_)
# 帧索引交集
interfid = set(afids).intersection(set(bfids))
# 或者直接判断帧索引是否有交集,返回 Ture or False
# interfid = set(afids).isdisjoint(set(bfids))
''' 2. 轨迹空间iou'''
alabel = np.array([0] * afids.size, dtype=np.int_)
blabel = np.array([1] * bfids.size, dtype=np.int_)
label = np.concatenate((alabel, blabel), axis=0)
fids = np.concatenate((afids, bfids), axis=0)
indices = np.argsort(fids)
idx_pair = []
for i in range(len(indices)-1):
idx1, idx2 = indices[i], indices[i+1]
if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] == 1:
if label[idx1] == 0:
a_idx = idx1
b_idx = idx2-alabel.size
else:
a_idx = idx2
b_idx = idx1-alabel.size
idx_pair.append((a_idx, b_idx))
ious = []
for a, b in idx_pair:
abox, bbox = aboxes[a, :], bboxes[b, :]
xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2
xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2
inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
(np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
# Union Area
box1_area = abox[2] * abox[3]
box2_area = bbox[2] * bbox[3]
union = box1_area + box2_area - inter + 1e-6
ious.append(inter/union)
''' 3. 轨迹特征相似度判断'''
afeat = readDict(aboxes, feat_dicts)
bfeat = readDict(bboxes, feat_dicts)
feat = np.concatenate((afeat, bfeat), axis=0)
emb_simil = 1-np.maximum(0.0, cdist(feat, feat, 'cosine'))
emb_ = 1-cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine')
cont1 = False if len(interfid) else True
cont2 = all(iou>0.5 for iou in ious)
cont3 = emb_[0, 0]>0.75
cont = cont1 and cont2 and cont3
return cont
def track_equal_str(atrack, btrack):
if atrack == btrack:
return True
else:
return False
def merge_track(Residual):
out_list = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
if track_equal_str(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
out_list.append(cur_list)
return out_list
def main():
Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
out_list = merge_track(Residual)
print(Residual)
print(out_list)
if __name__ == "__main__":
main()
# =============================================================================
# for i, atrack in enumerate(input_list):
# cur_list = []
# cur_list.append(atrack)
# del input_list[i]
#
# for j, btrack in enumerate(input_list):
# if track_equal(atrack, btrack):
# cur_list.append(btrack)
# del input_list[j]
#
# out_list.append(cur_list)
# =============================================================================

View File

@ -0,0 +1,94 @@
# Tracker
## Supported Trackers
- [x] ByteTracker
- [x] BoT-SORT
## Usage
### python interface:
You can use the Python interface to track objects using the YOLO model.
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt") # or a segmentation model .i.e yolov8n-seg.pt
model.track(
source="video/streams",
stream=True,
tracker="botsort.yaml", # or 'bytetrack.yaml'
show=True,
)
```
You can get the IDs of the tracked objects using the following code:
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
for result in model.track(source="video.mp4"):
print(
result.boxes.id.cpu().numpy().astype(int)
) # this will print the IDs of the tracked objects in the frame
```
If you want to use the tracker with a folder of images or when you loop on the video frames, you should use the `persist` parameter to tell the model that these frames are related to each other so the IDs will be fixed for the same objects. Otherwise, the IDs will be different in each frame because in each loop, the model creates a new object for tracking, but the `persist` parameter makes it use the same object for tracking.
```python
import cv2
from ultralytics import YOLO
cap = cv2.VideoCapture("video.mp4")
model = YOLO("yolov8n.pt")
while True:
ret, frame = cap.read()
if not ret:
break
results = model.track(frame, persist=True)
boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
ids = results[0].boxes.id.cpu().numpy().astype(int)
for box, id in zip(boxes, ids):
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
cv2.putText(
frame,
f"Id {id}",
(box[0], box[1]),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 255),
2,
)
cv2.imshow("frame", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
```
## Change tracker parameters
You can change the tracker parameters by editing the `tracker.yaml` file which is located in the ultralytics/cfg/trackers folder.
## Command Line Interface (CLI)
You can also use the command line interface to track objects using the YOLO model.
```bash
yolo detect track source=... tracker=...
yolo segment track source=... tracker=...
yolo pose track source=... tracker=...
```
By default, trackers will use the configuration in `ultralytics/cfg/trackers`. We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/cfg/trackers`.
## Contribute to Our Trackers Section
Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section! Your real-world applications and solutions could be invaluable for users working on tracking tasks.
By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community.
To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table!
Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏!

View File

@ -0,0 +1,10 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from .bot_sort import BOTSORT
from .byte_tracker import BYTETracker
from .track import register_tracker
__all__ = 'register_tracker', 'BOTSORT', 'BYTETracker' # allow simpler import

View File

@ -0,0 +1,71 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import OrderedDict
import numpy as np
class TrackState:
"""Enumeration of possible object tracking states."""
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack:
"""Base class for object tracking, handling basic track attributes and operations."""
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# Multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
"""Return the last frame ID of the track."""
return self.frame_id
@staticmethod
def next_id():
"""Increment and return the global track ID counter."""
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
"""Activate the track with the provided arguments."""
raise NotImplementedError
def predict(self):
"""Predict the next state of the track."""
raise NotImplementedError
def update(self, *args, **kwargs):
"""Update the track with new observations."""
raise NotImplementedError
def mark_lost(self):
"""Mark the track as lost."""
self.state = TrackState.Lost
def mark_removed(self):
"""Mark the track as removed."""
self.state = TrackState.Removed
@staticmethod
def reset_id():
"""Reset the global track ID counter."""
BaseTrack._count = 0

View File

@ -0,0 +1,198 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import deque
import numpy as np
from .basetrack import TrackState
from .byte_tracker import BYTETracker, STrack
from .utils import matching
# from .utils.gmc import GMC
from .utils.kalman_filter import KalmanFilterXYWH
from .reid.reid_interface import ReIDInterface
# from .reid.config import config
from tools.config import config
class BOTrack(STrack):
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
"""Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
super().__init__(tlwh, score, cls)
self.smooth_feat = None
self.curr_feat = None
if feat is not None:
self.update_features(feat)
self.features = deque([], maxlen=feat_history)
self.alpha = 0.9
def update_features(self, feat):
"""Update features vector and smooth it using exponential moving average."""
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
"""Predicts the mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[6] = 0
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
def re_activate(self, new_track, frame_id, new_id=False):
"""Reactivates a track with updated features and optionally assigns a new ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().re_activate(new_track, frame_id, new_id)
def update(self, new_track, frame_id):
"""Update the YOLOv8 instance with new track and frame ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().update(new_track, frame_id)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
@staticmethod
def multi_predict(stracks):
"""Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][6] = 0
multi_mean[i][7] = 0
multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
def convert_coords(self, tlwh):
"""Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
return self.tlwh_to_xywh(tlwh)
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
class BOTSORT(BYTETracker):
def __init__(self, args, frame_rate=30):
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
super().__init__(args, frame_rate)
# ReID module
self.proximity_thresh = args.proximity_thresh
self.appearance_thresh = args.appearance_thresh
if args.with_reid:
# Haven't supported BoT-SORT(reid) yet
self.encoder = ReIDInterface(config)
# self.gmc = GMC(method=args.gmc_method) # commented by WQG
def get_kalmanfilter(self):
"""Returns an instance of KalmanFilterXYWH for object tracking."""
return KalmanFilterXYWH()
def init_track(self, dets, scores, cls, imgs):
"""Initialize track with detections, scores, and classes."""
if len(dets) == 0:
return []
if self.args.with_reid and self.encoder is not None:
features_keep = self.encoder.inference(imgs, dets)
return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)] # detections
else:
return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections
def get_dists(self, tracks, detections):
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
dists = matching.iou_distance(tracks, detections)
# proximity_thresh 应该设较大的值表示只有两个boxes离得较远时不考虑reid特征
dists_mask = (dists > self.proximity_thresh)
# TODO: mot20
# if not self.args.mot20:
dists = matching.fuse_score(dists, detections)
if self.args.with_reid and self.encoder is not None:
emb_dists = matching.embedding_distance(tracks, detections) / 2.0
emb_dists[emb_dists > self.appearance_thresh] = 1.0
emb_dists[dists_mask] = 1.0
dists = np.minimum(dists, emb_dists)
return dists
def get_dists_1(self, tracks, detections):
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
iou_dists = matching.iou_distance(tracks, detections)
iou_dists_mask = (iou_dists>0.9)
iou_dists = matching.fuse_score(iou_dists, detections)
weight = 0.4
if self.args.with_reid and self.encoder is not None:
emb_dists = matching.embedding_distance(tracks, detections)
'''============ iou_dists 和 emb_dists 融合有两种策略 ==========='''
'''1. reid 相似度阈值,低于该值的两 boxes 图像不可能是同一对象,需要确定一个合理的可信阈值
2. iou 的约束为若约束,故 iou_dists 应设置为较大的值
'''
emb_dists_mask = (emb_dists > 0.85)
iou_dists[emb_dists_mask] = 1
emb_dists[iou_dists_mask] = 1
dists = np.minimum(iou_dists, emb_dists)
'''2. embed 阈值'''
# dists = (1-weight)*iou_dists + weight*emb_dists
else:
dists = iou_dists.copy()
return dists
def multi_predict(self, tracks):
"""Predict and track multiple objects with YOLOv8 model."""
BOTrack.multi_predict(tracks)
def get_result(self):
'''written by WQG'''
activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
track_features = []
if self.args.with_reid and self.encoder is not None:
track_features = np.asarray([x.curr_feat for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
return (activate_tracks, track_features)

View File

@ -0,0 +1,424 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import numpy as np
from .basetrack import BaseTrack, TrackState
from .utils import matching
from .utils.kalman_filter import KalmanFilterXYAH
def dists_update(dists, strack_pool, detections):
'''written by WQG'''
if len(strack_pool) and len(detections):
# alabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in strack_pool])
# blabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in detections])
alabel = np.array([int(stack.cls) for stack in strack_pool])
blabel = np.array([int(stack.cls) for stack in detections])
amlabel = np.expand_dims(alabel, axis=1).repeat(len(detections),axis=1)
bmlabel = np.expand_dims(blabel, axis=0).repeat(len(strack_pool),axis=0)
dist_label = 1 - (bmlabel == amlabel)
dists = np.where(dists > dist_label, dists, dist_label)
return dists
class STrack(BaseTrack):
shared_kalman = KalmanFilterXYAH()
def __init__(self, tlwh, score, cls):
"""wait activate."""
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.score = score
self.tracklet_len = 0
self.cls = cls
self.idx = tlwh[-1]
def predict(self):
"""Predicts mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
@staticmethod
def multi_predict(stracks):
"""Perform multi-object predictive tracking using Kalman filter for given stracks."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
"""Update state tracks positions and covariances using a homography matrix."""
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
R = H[:2, :2]
R8x8 = np.kron(np.eye(4, dtype=float), R)
t = H[:2, 2]
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
mean = R8x8.dot(mean)
mean[:2] += t
cov = R8x8.dot(cov).dot(R8x8.transpose())
stracks[i].mean = mean
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet."""
self.kalman_filter = kalman_filter
self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
if frame_id == 1:
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
"""Reactivates a previously lost track with a new detection."""
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
self.convert_coords(new_track.tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
self.cls = new_track.cls
self.idx = new_track.idx
def update(self, new_track, frame_id):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:return:
"""
self.frame_id = frame_id
self.tracklet_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
self.convert_coords(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.score = new_track.score
self.cls = new_track.cls
self.idx = new_track.idx
def convert_coords(self, tlwh):
"""Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent."""
return self.tlwh_to_xyah(tlwh)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
@staticmethod
def tlbr_to_tlwh(tlbr):
"""Converts top-left bottom-right format to top-left width height format."""
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
"""Converts tlwh bounding box format to tlbr format."""
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
"""Return a string representation of the BYTETracker object with start and end frames and track ID."""
return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
class BYTETracker:
def __init__(self, args, frame_rate=30):
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.args = args
self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
self.kalman_filter = self.get_kalmanfilter()
self.reset_id()
def update(self, results, img=None):
"""Updates object tracker with new detections and returns tracked object bounding boxes."""
self.frame_id += 1
activated_stracks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
scores = results.conf
bboxes = results.xyxy
# Add index
bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
cls = results.cls
remain_inds = scores > self.args.track_high_thresh
inds_low = scores > self.args.track_low_thresh
inds_high = scores < self.args.track_high_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
cls_keep = cls[remain_inds]
cls_second = cls[inds_second]
detections = self.init_track(dets, scores_keep, cls_keep, img)
# Add newly detected tracklets to tracked_stracks
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
# Step 2: First association, with high score detection boxes
strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
self.multi_predict(strack_pool)
# ============================================================= 没必要gmcWQG
# if hasattr(self, 'gmc') and img is not None:
# warp = self.gmc.apply(img, dets)
# STrack.multi_gmc(strack_pool, warp)
# STrack.multi_gmc(unconfirmed, warp)
# =============================================================================
dists = self.get_dists_1(strack_pool, detections)
'''written by WQG for different class'''
dists = dists_update(dists, strack_pool, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_stracks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# Step 3: Second association, with low score detection boxes
# association the untrack to the low score detections
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
# TODO
dists = matching.iou_distance(r_tracked_stracks, detections_second)
'''written by WQG for different class'''
dists = dists_update(dists, r_tracked_stracks, detections_second)
matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_stracks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
for it in u_track:
track = r_tracked_stracks[it]
if track.state != TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
# Deal with unconfirmed tracks, usually tracks with only one beginning frame
detections = [detections[i] for i in u_detection]
dists = self.get_dists_1(unconfirmed, detections)
'''written by WQG for different class'''
dists = dists_update(dists, unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_stracks.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
# Step 4: Init new stracks
for inew in u_detection:
track = detections[inew]
if track.score < self.args.new_track_thresh:
continue
track.activate(self.kalman_filter, self.frame_id)
activated_stracks.append(track)
# Step 5: Update state
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_stracks)
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
self.removed_stracks.extend(removed_stracks)
if len(self.removed_stracks) > 1000:
self.removed_stracks = self.removed_stracks[-999:] # clip remove stracks to 1000 maximum
return np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
def get_result(self):
'''written by WQG'''
# =============================================================================
# activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
# for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
#
# track_features = []
# =============================================================================
tracks = []
feats = []
for t in self.tracked_stracks:
if t.is_activated:
track = t.tlbr.tolist() + [t.track_id, t.score, t.cls, t.idx]
feat = track.curr_feature
tracks.append(track)
feats.append(feat)
tracks = np.asarray(tracks, dtype=np.float32)
return (tracks, feats)
def get_kalmanfilter(self):
"""Returns a Kalman filter object for tracking bounding boxes."""
return KalmanFilterXYAH()
def init_track(self, dets, scores, cls, img=None):
"""Initialize object tracking with detections and scores using STrack algorithm."""
return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
def get_dists(self, tracks, detections):
"""Calculates the distance between tracks and detections using IOU and fuses scores."""
dists = matching.iou_distance(tracks, detections)
# TODO: mot20
# if not self.args.mot20:
dists = matching.fuse_score(dists, detections)
return dists
def multi_predict(self, tracks):
"""Returns the predicted tracks using the YOLOv8 network."""
STrack.multi_predict(tracks)
def reset_id(self):
"""Resets the ID counter of STrack."""
STrack.reset_id()
@staticmethod
def joint_stracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
@staticmethod
def sub_stracks(tlista, tlistb):
"""DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
stracks = {t.track_id: t for t in tlista}
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
"""
track_ids_b = {t.track_id for t in tlistb}
return [t for t in tlista if t.track_id not in track_ids_b]
@staticmethod
def remove_duplicate_stracks(stracksa, stracksb):
"""Remove duplicate stracks with non-maximum IOU distance."""
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = [], []
for p, q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(stracksa) if i not in dupa]
resb = [t for i, t in enumerate(stracksb) if i not in dupb]
return resa, resb

View File

@ -0,0 +1,18 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.5 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
# mot20: False # for tracker evaluation(not used for now)
# BoT-SORT settings
gmc_method: sparseOptFlow # method of global motion compensation
# ReID model related thresh (not supported yet)
proximity_thresh: 0.5
appearance_thresh: 0.25
with_reid: True

View File

@ -0,0 +1,11 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.5 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
# mot20: False # for tracker evaluation(not used for now)

View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 19 16:15:35 2024
@author: ym
"""

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 19 14:01:46 2024
@author: ym
"""
import torch
import os
# import torchvision.transforms as T
class Config:
# network settings
backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
batch_size = 8
embedding_size = 256
img_size = 224
current_path = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_path, r"ckpts\resnet18_1220\best.pth")
# model_path = "./trackers/reid/ckpts/resnet18_1220/best.pth"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =============================================================================
# metric = 'arcface' # [cosface, arcface]
# drop_ratio = 0.5
#
# # training settings
# checkpoints = "checkpoints/Mobilev3Large_1225" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
# restore = False
#
# test_model = "./checkpoints/resnet18_1220/best.pth"
#
#
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# pin_memory = True # if memory is large, set it True to speed up a bit
# num_workers = 4 # dataloader
# =============================================================================
config = Config()

View File

@ -0,0 +1,83 @@
import torch.nn as nn
import torchvision
from torch.nn import init
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ChannelAttention(nn.Module):
def __int__(self,channel,reduction, num_layers):
super(ChannelAttention,self).__init__()
self.avgpool = nn.AdaptiveAvgPool2d(1)
gate_channels = [channel]
gate_channels += [len(channel)//reduction]*num_layers
gate_channels += [channel]
self.ca = nn.Sequential()
self.ca.add_module('flatten', Flatten())
for i in range(len(gate_channels)-2):
self.ca.add_module('',nn.Linear(gate_channels[i], gate_channels[i+1]))
self.ca.add_module('',nn.BatchNorm1d(gate_channels[i+1]))
self.ca.add_module('',nn.ReLU())
self.ca.add_module('',nn.Linear(gate_channels[-2], gate_channels[-1]))
def forward(self, x):
res = self.avgpool(x)
res = self.ca(res)
res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
return res
class SpatialAttention(nn.Module):
def __int__(self, channel,reduction=16,num_lay=3,dilation=2):
super(SpatialAttention).__init__()
self.sa = nn.Sequential()
self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel//reduction)*3))
self.sa.add_module('',nn.BatchNorm2d(num_features=(channel//reduction)))
self.sa.add_module('',nn.ReLU())
for i in range(num_lay):
self.sa.add_module('', nn.Conv2d(kernel_size=3,
in_channels=(channel//reduction),
out_channels=(channel//reduction),
padding=1,
dilation= 2))
self.sa.add_module('',nn.BatchNorm2d(channel//reduction))
self.sa.add_module('',nn.ReLU())
self.sa.add_module('',nn.Conv2d(channel//reduction, 1, kernel_size=1))
def forward(self,x):
res = self.sa(x)
res = res.expand_as(x)
return res
class BAMblock(nn.Module):
def __init__(self,channel=512, reduction=16, dia_val=2):
super(BAMblock, self).__init__()
self.ca = ChannelAttention(channel, reduction)
self.sa = SpatialAttention(channel,reduction,dia_val)
self.sigmoid = nn.Sigmoid()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bais is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self,x):
b, c, _, _ = x.size()
sa_out=self.sa(x)
ca_out=self.ca(x)
weight=self.sigmoid(sa_out+ca_out)
out=(1+weight)*x
return out
if __name__ =="__main__":
print(512//14)

Some files were not shown because too many files have changed in this diff Show More