update
This commit is contained in:
0
ytracking/tracking/__init__.py
Normal file
0
ytracking/tracking/__init__.py
Normal file
BIN
ytracking/tracking/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
ytracking/tracking/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/__pycache__/have_tracking.cpython-38.pyc
Normal file
BIN
ytracking/tracking/__pycache__/have_tracking.cpython-38.pyc
Normal file
Binary file not shown.
0
ytracking/tracking/dotrack/__init__.py
Normal file
0
ytracking/tracking/dotrack/__init__.py
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/dotrack/__pycache__/dotracks.cpython-38.pyc
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/dotracks.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/dotrack/__pycache__/dotracks.cpython-39.pyc
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/dotracks.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
ytracking/tracking/dotrack/__pycache__/track_back.cpython-38.pyc
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/track_back.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/dotrack/__pycache__/track_back.cpython-39.pyc
Normal file
BIN
ytracking/tracking/dotrack/__pycache__/track_back.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
ytracking/tracking/dotrack/cart_tempt/back_cartedge.png
Normal file
BIN
ytracking/tracking/dotrack/cart_tempt/back_cartedge.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
BIN
ytracking/tracking/dotrack/cart_tempt/back_incart.png
Normal file
BIN
ytracking/tracking/dotrack/cart_tempt/back_incart.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.6 KiB |
BIN
ytracking/tracking/dotrack/cart_tempt/back_outcart.png
Normal file
BIN
ytracking/tracking/dotrack/cart_tempt/back_outcart.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.6 KiB |
357
ytracking/tracking/dotrack/dotracks.py
Normal file
357
ytracking/tracking/dotrack/dotracks.py
Normal file
@ -0,0 +1,357 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Mon Mar 4 18:16:01 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from scipy.spatial.distance import cdist
|
||||
|
||||
from ytracking.tracking.utils.mergetrack import track_equal_track
|
||||
# curpath = Path(__file__).resolve().parents[0]
|
||||
from tools.config import cfg
|
||||
|
||||
|
||||
class MoveState:
|
||||
"""商品运动状态标志"""
|
||||
Static = 0
|
||||
DownWard = 1
|
||||
UpWard = 2
|
||||
FreeMove = 3
|
||||
HandHborder = 4
|
||||
Unknown = -1
|
||||
|
||||
class ShoppingCart:
|
||||
|
||||
def __init__(self, bboxes):
|
||||
self.bboxes = bboxes
|
||||
self.loadrate = self.load_rate()
|
||||
|
||||
def load_rate(self):
|
||||
bboxes = self.bboxes
|
||||
|
||||
fid = min(bboxes[:, 7])
|
||||
idx = bboxes[:, 7] == fid
|
||||
boxes = bboxes[idx]
|
||||
|
||||
temp = np.zeros(self.incart.shape, np.uint8)
|
||||
for i in range(boxes.shape[0]):
|
||||
x1, y1, x2, y2, tid = boxes[i, 0:5]
|
||||
cv2.rectangle(temp, (int(x1), int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
|
||||
|
||||
'''1. and 滤除购物车边框外的干扰'''
|
||||
loadstate = cv2.bitwise_and(self.incart, temp)
|
||||
|
||||
'''2. xor 得到购物车内内被填充的区域'''
|
||||
# loadstate = cv2.bitwise_xor(self.incart, temp1)
|
||||
|
||||
num_loadstate = cv2.countNonZero(loadstate)
|
||||
num_incart = cv2.countNonZero(self.incart)
|
||||
loadrate = num_loadstate / (num_incart+0.01)
|
||||
|
||||
# edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png", cv2.IMREAD_GRAYSCALE)
|
||||
# cv2.imwrite(f"./test/temp.png", cv2.add(temp, edgeline))
|
||||
# cv2.imwrite(f"./test/incart.png", cv2.add(self.incart, edgeline))
|
||||
# cv2.imwrite(f"./test/loadstate.png", cv2.add(loadstate, edgeline))
|
||||
|
||||
return loadrate
|
||||
|
||||
|
||||
|
||||
|
||||
@property
|
||||
def incart(self):
|
||||
# img = cv2.imread(curpath/'cart_tempt'/'back_incart.png', cv2.IMREAD_GRAYSCALE)
|
||||
img = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
|
||||
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
|
||||
|
||||
return binary
|
||||
|
||||
@property
|
||||
def outcart(self):
|
||||
# img = cv2.imread(curpath/'cart_tempt'/'back_outcart.png', cv2.IMREAD_GRAYSCALE)
|
||||
img = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
|
||||
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
|
||||
|
||||
return binary
|
||||
|
||||
@property
|
||||
def cartedge(self):
|
||||
# img = cv2.imread(curpath/'cart_tempt'/'back_cartedge.png', cv2.IMREAD_GRAYSCALE)
|
||||
img = cv2.imread(cfg.cartedge, cv2.IMREAD_GRAYSCALE)
|
||||
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
|
||||
|
||||
return binary
|
||||
|
||||
class Track:
|
||||
'''抽象基类,不能实例化对象'''
|
||||
def __init__(self, boxes, imgshape=(1024, 1280)):
|
||||
'''
|
||||
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
0 1 2 3 4 5 6 7 8
|
||||
'''
|
||||
self.boxes = boxes
|
||||
self.tid = int(boxes[0, 4])
|
||||
self.cls = int(boxes[0, 6])
|
||||
self.frnum = boxes.shape[0]
|
||||
self.imgBorder = False
|
||||
self.imgshape = imgshape
|
||||
self.state = MoveState.Unknown
|
||||
'''轨迹开始帧、结束帧 ID'''
|
||||
self.start_fid = int(np.min(boxes[:, 7]))
|
||||
self.end_fid = int(np.max(boxes[:, 7]))
|
||||
|
||||
# 根据需要,可以在子类中实现,降低顺序处理时的计算量
|
||||
self.compute_cornpoints()
|
||||
self.compute_cornpts_feats()
|
||||
|
||||
''' 基于 (x, y, w, h) 的一些计算,haved deprecated
|
||||
最后一帧与第一帧间的位移:
|
||||
vshift: 正值为向下,负值为向上
|
||||
hshift: 正值为物品向中心移动,负值为向购物车边框两边移动
|
||||
'''
|
||||
x0, y0 = (boxes[:,0] + boxes[:,2])/2, (boxes[:, 1] + boxes[:, 3])/2
|
||||
mw, mh = np.mean(boxes[:, 2]-boxes[:, 0]), np.mean((boxes[:, 3]-boxes[:, 1]))
|
||||
self.mwh = np.mean((mw, mh))
|
||||
self.Area = mw * mh
|
||||
self.vshift = y0[-1] - boxes[0]
|
||||
self.hshift = abs(x0[0]-self.imgshape[0]/2) - abs(x0[-1]-self.imgshape[0]/2)
|
||||
|
||||
|
||||
|
||||
# self.boxmean = [np.mean(self.boxes[:, k]) for k in range(4)]
|
||||
# self.mwh = np.mean(self.boxmean[2:])
|
||||
# self.Area = boxes[:,2] * boxes[:,3]
|
||||
# self.vshift = boxes[-1, 1] - boxes[0, 1]
|
||||
# self.hshift = abs(boxes[0, 0]-self.imgshape[0]/2) - abs(boxes[-1, 0]-self.imgshape[0]/2)
|
||||
|
||||
|
||||
def compute_cornpoints(self):
|
||||
'''
|
||||
cornpoints 共10项,分别是个点的坐标值(x, y)
|
||||
(center, top_left, top_right, bottom_left, bottom_right)
|
||||
'''
|
||||
boxes = self.boxes
|
||||
cornpoints = np.zeros((self.frnum, 10))
|
||||
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
|
||||
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
|
||||
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
|
||||
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
|
||||
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
|
||||
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
|
||||
|
||||
self.cornpoints = cornpoints
|
||||
def compute_cornpts_feats(self):
|
||||
'''
|
||||
'''
|
||||
trajectory = []
|
||||
trajlens = []
|
||||
trajdist = []
|
||||
trajrects = []
|
||||
for k in range(5):
|
||||
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
|
||||
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
|
||||
|
||||
X = self.cornpoints[:, 2*k:2*(k+1)]
|
||||
|
||||
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
|
||||
trajectory.append(traj)
|
||||
|
||||
trajlen = np.sum(traj)
|
||||
trajlens.append(trajlen)
|
||||
|
||||
ptdist = np.max(cdist(X, X))
|
||||
trajdist.append(ptdist)
|
||||
|
||||
rect = cv2.minAreaRect(X.astype(np.int64))
|
||||
trajrects.append(rect)
|
||||
|
||||
self.trajectory = trajectory
|
||||
self.trajlens = trajlens
|
||||
self.trajdist = trajdist
|
||||
self.trajrects = trajrects
|
||||
|
||||
|
||||
|
||||
def trajfeature(self):
|
||||
'''
|
||||
分两种情况计算轨迹特征(检测框边界不在图像边界范围内,在图像边界范围内):
|
||||
-最小长度轨迹:trajmin
|
||||
-最小轨迹长度:trajlen_min
|
||||
-最小轨迹欧氏距离:trajdist_max
|
||||
'''
|
||||
idx1 = self.trajlens.index(max(self.trajlens))
|
||||
trajmax = self.trajectory[idx1]
|
||||
trajlen_max = self.trajlens[idx1]
|
||||
trajdist_max = self.trajdist[idx1]
|
||||
if not self.isCornpoint:
|
||||
idx2 = self.trajlens.index(min(self.trajlens))
|
||||
trajmin = self.trajectory[idx2]
|
||||
trajlen_min = self.trajlens[idx2]
|
||||
trajdist_min = self.trajdist[idx2]
|
||||
else:
|
||||
trajmin = self.trajectory[0]
|
||||
trajlen_min = self.trajlens[0]
|
||||
trajdist_min = self.trajdist[0]
|
||||
|
||||
|
||||
'''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
|
||||
trajlen_rate = trajlen_min/(trajlen_max+0.0001)
|
||||
|
||||
'''最小轨迹欧氏距离/目标框尺度均值'''
|
||||
trajdist_rate = trajdist_min/(self.mwh+0.0001)
|
||||
|
||||
|
||||
|
||||
self.trajmin = trajmin
|
||||
self.trajmax = trajmax
|
||||
self.feature = [trajlen_min, trajlen_max,
|
||||
trajdist_min, trajdist_max,
|
||||
trajlen_rate, trajdist_rate]
|
||||
|
||||
|
||||
|
||||
|
||||
class doTracks:
|
||||
def __init__(self, bboxes, features_dict):
|
||||
|
||||
self.bboxes = bboxes
|
||||
self.features_dict = features_dict
|
||||
self.frameid = set(bboxes[:, 7])
|
||||
self.trackid = set(bboxes[:, 4])
|
||||
self.lboxes = self.array2list()
|
||||
|
||||
'''对 self.tracks 中的元素进行分类,将 track 归入相应列表中'''
|
||||
self.Static = []
|
||||
self.DownWard = []
|
||||
self.UpWard = []
|
||||
self.FreeMove = []
|
||||
|
||||
self.Hands = []
|
||||
self.Kids = []
|
||||
self.HandHborder = []
|
||||
self.Disruptors = []
|
||||
self.Residual = []
|
||||
self.Merged = []
|
||||
|
||||
def array2list(self):
|
||||
'''
|
||||
将 bboxes 变换为 track 列表
|
||||
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
Return:
|
||||
lboxes:列表,列表中元素具有同一 track_id,x1y1x2y2 格式
|
||||
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
'''
|
||||
track_ids = set(self.bboxes[:, 4])
|
||||
lboxes = []
|
||||
for t_id in track_ids:
|
||||
# print(f"The ID is: {t_id}")
|
||||
idx = np.where(self.bboxes[:, 4] == t_id)[0]
|
||||
box = self.bboxes[idx, :]
|
||||
|
||||
lboxes.append(box)
|
||||
|
||||
return lboxes
|
||||
|
||||
|
||||
def similarity(self):
|
||||
nt = len(self.tracks)
|
||||
similar_dict = {}
|
||||
if nt >= 2:
|
||||
for i in range(nt):
|
||||
for j in range(i, nt):
|
||||
tracka = self.tracks[i]
|
||||
trackb = self.tracks[j]
|
||||
similar = self.feat_similarity(tracka, trackb)
|
||||
similar_dict.update({(tracka.tid, trackb.tid): similar})
|
||||
return similar_dict
|
||||
|
||||
|
||||
def feat_similarity(self, tracka, trackb, metric='cosine'):
|
||||
boxes_a, boxes_b = tracka.boxes, trackb.boxes
|
||||
na, nb = tracka.boxes.shape[0], trackb.boxes.shape[0]
|
||||
feata, featb = [], []
|
||||
for i in range(na):
|
||||
fid, bid = tracka.boxes[i, 7:9]
|
||||
feata.append(self.features_dict[fid][bid])
|
||||
for i in range(nb):
|
||||
fid, bid = trackb.boxes[i, 7:9]
|
||||
featb.append(self.features_dict[fid][bid])
|
||||
|
||||
feata = np.asarray(feata, dtype=np.float32)
|
||||
featb = np.asarray(featb, dtype=np.float32)
|
||||
similarity_matrix = 1-np.maximum(0.0, cdist(feata, featb, metric))
|
||||
|
||||
feata_m = np.mean(feata, axis =0)[None,:]
|
||||
featb_m = np.mean(featb, axis =0)[None,:]
|
||||
simi_ab = 1 - cdist(feata_m, featb_m, metric)
|
||||
print(f'tid {int(boxes_a[0, 4])} vs {int(boxes_b[0, 4])}: {simi_ab[0][0]}')
|
||||
|
||||
# return np.max(similarity_matrix)
|
||||
return simi_ab
|
||||
|
||||
def merge_tracks_loop(self, alist):
|
||||
na, nb = len(alist), 0
|
||||
while na!=nb:
|
||||
na = len(alist)
|
||||
alist = self.merge_tracks(alist)
|
||||
nb = len(alist)
|
||||
return alist
|
||||
|
||||
def base_merge_tracks(self, Residual):
|
||||
"""
|
||||
对不同id,但可能是同一商品的目标进行归并
|
||||
"""
|
||||
mergedTracks = []
|
||||
alist = [t for t in Residual]
|
||||
while alist:
|
||||
atrack = alist[0]
|
||||
cur_list = []
|
||||
cur_list.append(atrack)
|
||||
alist.pop(0)
|
||||
|
||||
blist = [b for b in alist]
|
||||
alist = []
|
||||
for btrack in blist:
|
||||
if track_equal_track(atrack, btrack, self.features_dict):
|
||||
cur_list.append(btrack)
|
||||
else:
|
||||
alist.append(btrack)
|
||||
|
||||
mergedTracks.append(cur_list)
|
||||
|
||||
return mergedTracks
|
||||
|
||||
|
||||
@staticmethod
|
||||
def join_tracks(tlista, tlistb):
|
||||
"""Combine two lists of stracks into a single one."""
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.tid] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.tid
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def sub_tracks(tlista, tlistb):
|
||||
track_ids_b = {t.tid for t in tlistb}
|
||||
return [t for t in tlista if t.tid not in track_ids_b]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
204
ytracking/tracking/dotrack/dotracks_back.py
Normal file
204
ytracking/tracking/dotrack/dotracks_back.py
Normal file
@ -0,0 +1,204 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Mon Mar 4 18:36:31 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
from ytracking.tracking.dotrack.dotracks import doTracks, ShoppingCart
|
||||
from ytracking.tracking.dotrack.track_back import backTrack
|
||||
|
||||
|
||||
class doBackTracks(doTracks):
|
||||
def __init__(self, bboxes, features_dict):
|
||||
|
||||
super().__init__(bboxes, features_dict)
|
||||
|
||||
self.tracks = [backTrack(b) for b in self.lboxes]
|
||||
|
||||
# self.similar_dict = self.similarity()
|
||||
|
||||
|
||||
self.shopcart = ShoppingCart(bboxes)
|
||||
|
||||
# =============================================================================
|
||||
# def array2list(self):
|
||||
# ''' 0, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
# bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# lboxes:[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# '''
|
||||
#
|
||||
# track_ids = set(self.bboxes[:, 4])
|
||||
# lboxes = []
|
||||
# for t_id in track_ids:
|
||||
# idx = np.where(self.bboxes[:, 4] == t_id)[0]
|
||||
# box = self.bboxes[idx, :]
|
||||
#
|
||||
# x = (box[:, 0] + box[:, 2]) / 2
|
||||
# y = (box[:, 1] + box[:, 3]) / 2
|
||||
#
|
||||
# # box: [x, y, w, h, track_id, score, cls, frame_index]
|
||||
# box[:, 2] = box[:, 2] - box[:, 0]
|
||||
# box[:, 3] = box[:, 3] - box[:, 1]
|
||||
# box[:, 0] = x
|
||||
# box[:, 1] = y
|
||||
#
|
||||
# lboxes.append(box)
|
||||
#
|
||||
#
|
||||
# return lboxes
|
||||
# =============================================================================
|
||||
|
||||
|
||||
|
||||
def classify(self):
|
||||
'''
|
||||
功能:对 tracks 中元素分类
|
||||
|
||||
'''
|
||||
|
||||
tracks = self.tracks
|
||||
shopcart = self.shopcart
|
||||
|
||||
# 提取手的frame_id,并和动目标的frame_id 进行关联
|
||||
hand_tracks = [t for t in tracks if t.cls==0]
|
||||
self.Hands.extend(hand_tracks)
|
||||
tracks = self.sub_tracks(tracks, hand_tracks)
|
||||
|
||||
|
||||
|
||||
# 提取小孩的track,并计算状态:left, right, incart
|
||||
kid_tracks = [t for t in tracks if t.cls==9]
|
||||
kid_states = [self.kid_state(t) for t in kid_tracks]
|
||||
self.Kids = [x for x in zip(kid_tracks, kid_states)]
|
||||
|
||||
tracks = self.sub_tracks(tracks, kid_tracks)
|
||||
|
||||
|
||||
|
||||
'''静态情况 1: 目标关键点最小相对运动轨迹 < 0.2, 指标值偏大
|
||||
feature = [trajlen_min, trajlen_max,
|
||||
trajdist_min, trajdist_max,
|
||||
trajlen_rate, trajdist_rate]
|
||||
'''
|
||||
track1 = [t for t in tracks if t.feature[5] < 0.2
|
||||
or t.feature[3] < 120
|
||||
]
|
||||
|
||||
|
||||
'''静态情况 2: 目标初始状态为静止,适当放宽关键点最小相对运动轨迹 < 0.5'''
|
||||
track2 = [t for t in tracks if t.static_index.size > 0
|
||||
and t.static_index[0, 0] <= 2
|
||||
and t.feature[5] < 0.5]
|
||||
|
||||
'''静态情况 3: 目标初始状态和最终状态均为静止'''
|
||||
track3 = [t for t in tracks if t.static_index.shape[0] >= 2
|
||||
and t.static_index[0, 0] <= 2
|
||||
and t.static_index[-1, 1] >= t.frnum-3]
|
||||
|
||||
track12 = self.join_tracks(track1, track2)
|
||||
|
||||
'''提取静止状态的 track'''
|
||||
static_tracks = self.join_tracks(track12, track3)
|
||||
self.Static.extend(static_tracks)
|
||||
|
||||
|
||||
'''剔除静止目标后的 tracks'''
|
||||
tracks = self.sub_tracks(tracks, static_tracks)
|
||||
|
||||
|
||||
'''购物框边界外具有运动状态的干扰目标'''
|
||||
trcak4 = [t for t in tracks if self.isouttrack(t)]
|
||||
|
||||
tracks = self.sub_tracks(tracks, trcak4)
|
||||
|
||||
|
||||
'''轨迹循环归并'''
|
||||
# merged_tracks = self.merge_tracks(tracks)
|
||||
merged_tracks = self.merge_tracks_loop(tracks)
|
||||
|
||||
self.Residual = merged_tracks
|
||||
|
||||
|
||||
|
||||
def merge_tracks(self, Residual):
|
||||
"""
|
||||
对不同id,但可能是同一商品的目标进行归并
|
||||
"""
|
||||
mergedTracks = self.base_merge_tracks(Residual)
|
||||
|
||||
oldtracks, newtracks = [], []
|
||||
for tracklist in mergedTracks:
|
||||
if len(tracklist) > 1:
|
||||
boxes = np.empty((0, 9), dtype=np.float32)
|
||||
for i, track in enumerate(tracklist):
|
||||
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
|
||||
iboxes = track.boxes.copy()
|
||||
|
||||
iboxes[:, 4], iboxes[:, 6] = ntid, ncls
|
||||
boxes = np.concatenate((boxes, iboxes), axis=0)
|
||||
|
||||
oldtracks.append(track)
|
||||
|
||||
fid_indices = np.argsort(boxes[:, 7])
|
||||
boxes_fid = boxes[fid_indices]
|
||||
|
||||
newtracks.append(backTrack(boxes_fid))
|
||||
elif len(tracklist) == 1:
|
||||
oldtracks.append(tracklist[0])
|
||||
newtracks.append(tracklist[0])
|
||||
|
||||
|
||||
redu = self.sub_tracks(Residual, oldtracks)
|
||||
merged = self.join_tracks(redu, newtracks)
|
||||
|
||||
return merged
|
||||
|
||||
def kid_state(self, track):
|
||||
|
||||
left_dist = track.cornpoints[:, 2]
|
||||
right_dist = 1024 - track.cornpoints[:, 4]
|
||||
|
||||
if np.sum(left_dist<30)/track.frnum>0.8 and np.sum(right_dist>512)/track.frnum>0.7:
|
||||
kidstate = "left"
|
||||
elif np.sum(left_dist>512)/track.frnum>0.7 and np.sum(right_dist<30)/track.frnum>0.8:
|
||||
kidstate = "right"
|
||||
else:
|
||||
kidstate = "incart"
|
||||
|
||||
return kidstate
|
||||
|
||||
|
||||
|
||||
def hand_association(self):
|
||||
"""
|
||||
分析商品和手之间的关联性
|
||||
"""
|
||||
pass
|
||||
|
||||
def isouttrack(self, track):
|
||||
if track.posState <= 1:
|
||||
isout = True
|
||||
else:
|
||||
isout = False
|
||||
return isout
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def isuptrack(self, track):
|
||||
Flag = False
|
||||
|
||||
return Flag
|
||||
|
||||
def isdowntrack(self, track):
|
||||
Flag = False
|
||||
|
||||
return Flag
|
||||
|
||||
def isfreetrack(self, track):
|
||||
Flag = False
|
||||
|
||||
return Flag
|
190
ytracking/tracking/dotrack/dotracks_front.py
Normal file
190
ytracking/tracking/dotrack/dotracks_front.py
Normal file
@ -0,0 +1,190 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Mon Mar 4 18:38:20 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
from ytracking.tracking.utils.mergetrack import track_equal_track
|
||||
from .dotracks import doTracks
|
||||
from .track_front import frontTrack
|
||||
|
||||
class doFrontTracks(doTracks):
|
||||
def __init__(self, bboxes, features_dict):
|
||||
super().__init__(bboxes, features_dict)
|
||||
|
||||
self.tracks = [frontTrack(b) for b in self.lboxes]
|
||||
|
||||
def classify(self):
|
||||
'''功能:对 tracks 中元素分类 '''
|
||||
|
||||
tracks = self.tracks
|
||||
|
||||
'''提取手的 tracks'''
|
||||
hand_tracks = [t for t in tracks if t.cls==0]
|
||||
|
||||
for htrack in hand_tracks:
|
||||
htrack.extract_hand_features()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
self.Hands.extend(hand_tracks)
|
||||
tracks = self.sub_tracks(tracks, hand_tracks)
|
||||
|
||||
|
||||
|
||||
'''提取小孩的 tracks'''
|
||||
kid_tracks = [t for t in tracks if t.cls==9]
|
||||
tracks = self.sub_tracks(tracks, kid_tracks)
|
||||
|
||||
|
||||
'''静态 tracks'''
|
||||
static_tracks = [t for t in tracks if t.frnum>1 and t.is_Static()]
|
||||
|
||||
|
||||
'''剔除静止目标后的 tracks'''
|
||||
tracks = self.sub_tracks(tracks, static_tracks)
|
||||
|
||||
'''轨迹循环归并'''
|
||||
merged_tracks = self.merge_tracks_loop(tracks)
|
||||
|
||||
tracks = [t for t in merged_tracks if t.frnum > 1]
|
||||
|
||||
for gtrack in tracks:
|
||||
# print(f"Goods ID:{gtrack.tid}")
|
||||
for htrack in hand_tracks:
|
||||
if self.is_associate_with_hand(htrack, gtrack):
|
||||
gtrack.hands.append(htrack)
|
||||
|
||||
freemoved_tracks = [t for t in tracks if t.is_FreeMove()]
|
||||
|
||||
tracks = self.sub_tracks(tracks, freemoved_tracks)
|
||||
|
||||
|
||||
self.Residual = tracks
|
||||
|
||||
def is_associate_with_hand(self, htrack, gtrack):
|
||||
'''手部 Track、商品 Track 建立关联的依据:
|
||||
a. 运动帧的帧索引有交集
|
||||
b. 帧索引交集部分iou均大于0
|
||||
'''
|
||||
|
||||
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
|
||||
|
||||
hboxes = np.empty(shape=(0, 9), dtype = float)
|
||||
gboxes = np.empty(shape=(0, 9), dtype = float)
|
||||
|
||||
# start, end 为索引值,需要 start:(end+1)
|
||||
for start, end in htrack.dynamic_y2:
|
||||
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
|
||||
for start, end in gtrack.dynamic_y1:
|
||||
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
|
||||
|
||||
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
|
||||
fids = set(hfids).intersection(set(gfids))
|
||||
|
||||
if len(fids)==0:
|
||||
return False
|
||||
|
||||
|
||||
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
|
||||
ious = []
|
||||
for f in fids:
|
||||
h = np.where(hfids==f)[0][0]
|
||||
g = np.where(gfids==f)[0][0]
|
||||
|
||||
x11, y11, x12, y12 = hboxes[h, 0:4]
|
||||
x21, y21, x22, y22 = gboxes[g, 0:4]
|
||||
|
||||
x1, y1 = max((x11, x21)), max((y11, y21))
|
||||
x2, y2 = min((x12, x22)), min((y12, y22))
|
||||
|
||||
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
|
||||
area1 = (x12 - x11) * (y12 - y11)
|
||||
area2 = (x22 - x21) * (y22 - y21)
|
||||
|
||||
iou = union / (area1 + area2 - union + 1e-6)
|
||||
|
||||
if iou>0:
|
||||
ious.append(iou)
|
||||
|
||||
return len(ious)
|
||||
|
||||
|
||||
|
||||
def merge_tracks(self, Residual):
|
||||
"""
|
||||
对不同id,但可能是同一商品的目标进行归并
|
||||
"""
|
||||
# =============================================================================
|
||||
# mergedTracks = []
|
||||
# alist = [t for t in Residual]
|
||||
# while alist:
|
||||
# atrack = alist[0]
|
||||
# cur_list = []
|
||||
# cur_list.append(atrack)
|
||||
# alist.pop(0)
|
||||
#
|
||||
# blist = [b for b in alist]
|
||||
# alist = []
|
||||
# for btrack in blist:
|
||||
# if track_equal_track(atrack, btrack, self.features_dict):
|
||||
# cur_list.append(btrack)
|
||||
# else:
|
||||
# alist.append(btrack)
|
||||
#
|
||||
# mergedTracks.append(cur_list)
|
||||
# =============================================================================
|
||||
mergedTracks = self.base_merge_tracks(Residual)
|
||||
|
||||
oldtracks, newtracks = [], []
|
||||
for tracklist in mergedTracks:
|
||||
if len(tracklist) > 1:
|
||||
boxes = np.empty((0, 9), dtype=np.float32)
|
||||
for i, track in enumerate(tracklist):
|
||||
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
|
||||
iboxes = track.boxes.copy()
|
||||
iboxes[:, 4], iboxes[:, 6] = ntid, ncls
|
||||
boxes = np.concatenate((boxes, iboxes), axis=0)
|
||||
oldtracks.append(track)
|
||||
|
||||
fid_indices = np.argsort(boxes[:, 7])
|
||||
boxes_fid = boxes[fid_indices]
|
||||
|
||||
newtracks.append(frontTrack(boxes_fid))
|
||||
elif len(tracklist) == 1:
|
||||
oldtracks.append(tracklist[0])
|
||||
newtracks.append(tracklist[0])
|
||||
|
||||
|
||||
redu = self.sub_tracks(Residual, oldtracks)
|
||||
merged = self.join_tracks(redu, newtracks)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# def array2list(self):
|
||||
# '''
|
||||
# 将 bboxes 变换为 track 列表
|
||||
# bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# Return:
|
||||
# lboxes:列表,列表中元素具有同一 track_id,x1y1x2y2 格式
|
||||
# [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# '''
|
||||
# track_ids = set(self.bboxes[:, 4])
|
||||
# lboxes = []
|
||||
# for t_id in track_ids:
|
||||
# # print(f"The ID is: {t_id}")
|
||||
# idx = np.where(self.bboxes[:, 4] == t_id)[0]
|
||||
# box = self.bboxes[idx, :]
|
||||
#
|
||||
# lboxes.append(box)
|
||||
#
|
||||
# return lboxes
|
||||
# =============================================================================
|
||||
|
304
ytracking/tracking/dotrack/track_back.py
Normal file
304
ytracking/tracking/dotrack/track_back.py
Normal file
@ -0,0 +1,304 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Mon Mar 4 18:28:47 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
from scipy.spatial.distance import cdist
|
||||
from sklearn.decomposition import PCA
|
||||
from .dotracks import MoveState, Track
|
||||
from tools.config import cfg
|
||||
|
||||
class backTrack(Track):
|
||||
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
def __init__(self, boxes, imgshape=(1024, 1280)):
|
||||
|
||||
super().__init__(boxes, imgshape)
|
||||
|
||||
'''(cent_x, cent_y, mean_w, mean_h)'''
|
||||
|
||||
|
||||
|
||||
|
||||
'''5个关键点(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
|
||||
self.compute_cornpts_feats()
|
||||
|
||||
'''该函数依赖项: self.cornpoints'''
|
||||
self.isCornpoint = self.isimgborder()
|
||||
|
||||
'''该函数依赖项: self.cornpoints、self.isCornpoint,定义 10 个轨迹特征:
|
||||
self.trajectory, self.trajmin, self.trajlens, self.trajdist
|
||||
self.feature = [trajlen_min, trajlen_max,
|
||||
trajdist_min, trajdist_max,
|
||||
trajlen_rate, trajdist_rate] '''
|
||||
self.trajfeature()
|
||||
|
||||
|
||||
|
||||
|
||||
'''静止点帧索引'''
|
||||
self.static_index = self.compute_static_fids()
|
||||
|
||||
'''运动点帧索引(运动帧两端的静止帧索引)'''
|
||||
self.moving_index = self.compute_moving_fids()
|
||||
|
||||
'''该函数依赖项: self.cornpoints,定义 4 个商品位置变量:
|
||||
self.Cent_isIncart, self.LB_isIncart, self.RB_isIncart
|
||||
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart'''
|
||||
self.PositionState()
|
||||
|
||||
'''self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
|
||||
self.incartrates = incartrates'''
|
||||
self.compute_ious_feat()
|
||||
|
||||
# self.PCA()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# def compute_cornpoints(self):
|
||||
# '''
|
||||
# cornpoints 共10项,分别是个点的坐标值(x, y)
|
||||
# (center, top_left, top_right, bottom_left, bottom_right)
|
||||
# '''
|
||||
# boxes = self.boxes
|
||||
# cornpoints = np.zeros((self.frnum, 10))
|
||||
# cornpoints[:,0], cornpoints[:,1] = boxes[:, 0], boxes[:, 1]
|
||||
# cornpoints[:,2], cornpoints[:,3] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
|
||||
# cornpoints[:,4], cornpoints[:,5] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] - boxes[:, 3]/2
|
||||
# cornpoints[:,6], cornpoints[:,7] = boxes[:, 0] - boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
|
||||
# cornpoints[:,8], cornpoints[:,9] = boxes[:, 0] + boxes[:, 2]/2, boxes[:, 1] + boxes[:, 3]/2
|
||||
#
|
||||
# self.cornpoints = cornpoints
|
||||
# =============================================================================
|
||||
|
||||
def isimgborder(self, BoundPixel=10, BoundThresh=0.3):
|
||||
|
||||
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
|
||||
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
|
||||
|
||||
cont1 = sum(abs(x1)<BoundPixel) / self.frnum > BoundThresh
|
||||
cont2 = sum(abs(y1)<BoundPixel) / self.frnum > BoundThresh
|
||||
cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) / self.frnum > BoundThresh
|
||||
cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) / self.frnum > BoundThresh
|
||||
|
||||
cont = cont1 or cont2 or cont3 or cont4
|
||||
isCornpoint = False
|
||||
if cont:
|
||||
isCornpoint = True
|
||||
|
||||
return isCornpoint
|
||||
|
||||
|
||||
def PositionState(self, camerType="back"):
|
||||
'''
|
||||
camerType: back, 后置摄像头
|
||||
front, 前置摄像头
|
||||
'''
|
||||
if camerType=="front":
|
||||
incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
|
||||
else:
|
||||
incart = cv2.imread(cfg.incart_ftmp, cv2.IMREAD_GRAYSCALE)
|
||||
|
||||
xc, yc = self.cornpoints[:,0].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,1].clip(0,self.imgshape[1]-1).astype(np.int64)
|
||||
x1, y1 = self.cornpoints[:,6].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,7].clip(0,self.imgshape[1]-1).astype(np.int64)
|
||||
x2, y2 = self.cornpoints[:,8].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,9].clip(0,self.imgshape[1]-1).astype(np.int64)
|
||||
|
||||
# print(self.tid)
|
||||
Cent_inCartnum = np.count_nonzero(incart[(yc, xc)])
|
||||
LB_inCartnum = np.count_nonzero(incart[(y1, x1)])
|
||||
RB_inCartnum = np.count_nonzero(incart[(y2, x2)])
|
||||
|
||||
self.Cent_isIncart = False
|
||||
self.LB_isIncart = False
|
||||
self.RB_isIncart = False
|
||||
if Cent_inCartnum: self.Cent_isIncart = True
|
||||
if LB_inCartnum: self.LB_isIncart = True
|
||||
if RB_inCartnum: self.RB_isIncart = True
|
||||
|
||||
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart
|
||||
|
||||
|
||||
|
||||
def PCA(self):
|
||||
self.pca = PCA()
|
||||
|
||||
X = self.cornpoints[:, 0:2]
|
||||
self.pca.fit(X)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def compute_ious_feat(self):
|
||||
'''输出:
|
||||
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
|
||||
self.incartrates = incartrates,
|
||||
其中:
|
||||
boxes流:track中所有boxes形成的轨迹图,可分为三部分:incart, outcart, cartboarder
|
||||
incart_iou, outcart_iou, cartboarder_iou:各部分和 boxes流的 iou。
|
||||
incart_iou = 0,track在购物车外,
|
||||
outcart_iou = 0,track在购物车内,也可能是通过左下角、右下角置入购物车,
|
||||
maxbox_iou, minbox_iou:track中最大、最小 box 和boxes流的iou,二者差值越小,越接近 1,表明track的运动型越小。
|
||||
incartrates: 各box和incart的iou时序,由小变大,反应的是置入过程,由大变小,反应的是取出过程
|
||||
'''
|
||||
incart = cv2.imread(cfg.incart, cv2.IMREAD_GRAYSCALE)
|
||||
outcart = cv2.imread(cfg.outcart, cv2.IMREAD_GRAYSCALE)
|
||||
cartboarder = cv2.imread(cfg.cartboarder, cv2.IMREAD_GRAYSCALE)
|
||||
|
||||
incartrates = []
|
||||
temp = np.zeros(incart.shape, np.uint8)
|
||||
maxarea, minarea = 0, self.imgshape[0]*self.imgshape[1]
|
||||
for i in range(self.frnum):
|
||||
# x, y, w, h = self.boxes[i, 0:4]
|
||||
|
||||
x = (self.boxes[i, 2] + self.boxes[i, 0]) / 2
|
||||
w = (self.boxes[i, 2] - self.boxes[i, 0]) / 2
|
||||
y = (self.boxes[i, 3] + self.boxes[i, 1]) / 2
|
||||
h = (self.boxes[i, 3] - self.boxes[i, 1]) / 2
|
||||
|
||||
|
||||
if w*h > maxarea: maxarea = w*h
|
||||
if w*h < minarea: minarea = w*h
|
||||
cv2.rectangle(temp, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
|
||||
|
||||
temp1 = np.zeros(incart.shape, np.uint8)
|
||||
cv2.rectangle(temp1, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
|
||||
temp2 = cv2.bitwise_and(incart, temp1)
|
||||
inrate = cv2.countNonZero(temp1)/(w*h)
|
||||
incartrates.append(inrate)
|
||||
|
||||
isincart = cv2.bitwise_and(incart, temp)
|
||||
isoutcart = cv2.bitwise_and(outcart, temp)
|
||||
iscartboarder = cv2.bitwise_and(cartboarder, temp)
|
||||
|
||||
num_temp = cv2.countNonZero(temp)
|
||||
num_incart = cv2.countNonZero(isincart)
|
||||
num_outcart = cv2.countNonZero(isoutcart)
|
||||
num_cartboarder = cv2.countNonZero(iscartboarder)
|
||||
|
||||
incart_iou = num_incart/num_temp
|
||||
outcart_iou = num_outcart/num_temp
|
||||
cartboarder_iou = num_cartboarder/num_temp
|
||||
maxbox_iou = maxarea/num_temp
|
||||
minbox_iou = minarea/num_temp
|
||||
|
||||
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
|
||||
self.incartrates = incartrates
|
||||
|
||||
|
||||
|
||||
|
||||
def compute_static_fids(self, thresh1 = 12, thresh2 = 3):
|
||||
'''
|
||||
计算 track 的轨迹中相对处于静止状态的轨迹点的(start_frame_id, end_frame_id)
|
||||
thresh1: 相邻两帧目标中心点是否静止的的阈值,以像素为单位,
|
||||
thresh2: 连续捕捉到目标处于静止状态的帧数
|
||||
'''
|
||||
|
||||
BoundPixel = 8
|
||||
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
|
||||
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
|
||||
cont1 = sum(abs(x1)<BoundPixel) > 3
|
||||
# cont2 = sum(abs(y1)<BoundPixel) > 3
|
||||
cont3 = sum(abs(x2-self.imgshape[0])<BoundPixel) > 3
|
||||
# cont4 = sum(abs(y2-self.imgshape[1])<BoundPixel) > 3
|
||||
cont = not(cont1 or cont3)
|
||||
|
||||
## ============== 下一步,启用中心点,选择具有最小运动幅度的角点作为参考点
|
||||
|
||||
static_index = []
|
||||
if self.frnum>=2 and cont:
|
||||
x1 = self.boxes[1:,7]
|
||||
x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
|
||||
dist_adjc = np.interp(x2, x1, self.trajmin)
|
||||
|
||||
|
||||
# dist_adjc = self.trajmin
|
||||
|
||||
static_thresh = (dist_adjc < thresh1)[:, None].astype(np.uint8)
|
||||
static_cnts, _ = cv2.findContours(static_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
|
||||
|
||||
for cnt in static_cnts:
|
||||
_, start, _, num = cv2.boundingRect(cnt)
|
||||
end = start + num
|
||||
if num <= thresh2:
|
||||
continue
|
||||
static_index.append((start, end))
|
||||
|
||||
# =============================================================================
|
||||
# '''========= 输出帧id,不太合适 ========='''
|
||||
# static_fids = []
|
||||
# for i in range(len(static_index)):
|
||||
# i1, i2 = static_index[i]
|
||||
# fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
|
||||
# static_fids.append([fid1, fid2])
|
||||
# static_fids = np.array(static_fids)
|
||||
# =============================================================================
|
||||
|
||||
static_index = np.array(static_index)
|
||||
if static_index.size:
|
||||
indx = np.argsort(static_index[:, 0])
|
||||
static_index = static_index[indx]
|
||||
|
||||
return static_index
|
||||
|
||||
def compute_moving_fids(self, thresh1 = 12, thresh2 = 3):
|
||||
'''
|
||||
计算 track 的轨迹中运动轨迹点的(start_frame_id, end_frame_id)
|
||||
thresh1: 相邻两帧目标中心点是否运动的阈值,以像素为单位,
|
||||
thresh2: 连续捕捉到目标连续运动的帧数
|
||||
目标:
|
||||
1. 计算轨迹方向
|
||||
2. 计算和手部运动的关联性
|
||||
'''
|
||||
moving_index = []
|
||||
if self.frnum>=2:
|
||||
x1 = self.boxes[1:,7]
|
||||
x2 = [i for i in range(int(min(x1)), int(max(x1)+1))]
|
||||
dist_adjc = np.interp(x2, x1, self.trajmin)
|
||||
|
||||
moving_thresh = (dist_adjc >= thresh1)[:, None].astype(np.uint8)
|
||||
moving_cnts, _ = cv2.findContours(moving_thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
|
||||
|
||||
for cnt in moving_cnts:
|
||||
_, start, _, num = cv2.boundingRect(cnt)
|
||||
if num < thresh2:
|
||||
continue
|
||||
end = start + num
|
||||
moving_index.append((start, end))
|
||||
|
||||
# =============================================================================
|
||||
# '''========= 输出帧id,不太合适 ========='''
|
||||
# moving_fids = []
|
||||
# for i in range(len(moving_index)):
|
||||
# i1, i2 = moving_index[i]
|
||||
# fid1, fid2 = boxes[i1, 7], boxes[i2, 7]
|
||||
# moving_fids.append([fid1, fid2])
|
||||
# moving_fids = np.array(moving_fids)
|
||||
# =============================================================================
|
||||
moving_index = np.array(moving_index)
|
||||
if moving_index.size:
|
||||
indx = np.argsort(moving_index[:, 0])
|
||||
moving_index = moving_index[indx]
|
||||
|
||||
return moving_index
|
||||
|
||||
|
||||
|
||||
|
||||
def compute_distance(self):
|
||||
pass
|
||||
|
||||
|
||||
def move_start_fid(self):
|
||||
pass
|
||||
|
||||
|
||||
def move_end_fid(self):
|
||||
pass
|
275
ytracking/tracking/dotrack/track_front.py
Normal file
275
ytracking/tracking/dotrack/track_front.py
Normal file
@ -0,0 +1,275 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Mon Mar 4 18:33:01 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
from sklearn.cluster import KMeans
|
||||
from .dotracks import MoveState, Track
|
||||
|
||||
|
||||
class frontTrack(Track):
|
||||
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
||||
# 0, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
def __init__(self, boxes, imgshape=(1024, 1280)):
|
||||
|
||||
super().__init__(boxes, imgshape)
|
||||
self.hands = []
|
||||
|
||||
self.mwh = np.mean((self.boxes[:, 2] - self.boxes[:, 0]) *
|
||||
(self.boxes[:, 3] - self.boxes[:, 1]))
|
||||
|
||||
|
||||
'''5个关键点(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
|
||||
self.compute_cornpts_feats()
|
||||
|
||||
|
||||
self.CART_HIGH_THRESH1 = imgshape[1]/2.98
|
||||
|
||||
|
||||
|
||||
|
||||
# if self.tid==10:
|
||||
# print(f"ID: {self.tid}")
|
||||
|
||||
'''y1、y2静止状态区间,值是 boxes 中对 axis=0 的索引,不是帧索引'''
|
||||
self.static_y1, self.dynamic_y1 = self.compute_static_fids(boxes[:, 1])
|
||||
self.static_y2, self.dynamic_y2 = self.compute_static_fids(boxes[:, 3])
|
||||
|
||||
self.isCornpoint = self.is_left_right_cornpoint()
|
||||
self.isBotmpoint = self.is_bottom_cornpoint()
|
||||
|
||||
self.trajfeature()
|
||||
|
||||
'''手部状态分析'''
|
||||
self.HAND_STATIC_THRESH = 100
|
||||
self.CART_POSIT_0 = 430
|
||||
self.CART_POSIT_1 = 620
|
||||
|
||||
|
||||
|
||||
|
||||
def is_left_right_cornpoint(self):
|
||||
''' 基于 all(boxes),
|
||||
boxes左下角点和图像左下角点重叠 或
|
||||
boxes右下角点和图像左下角点重叠
|
||||
'''
|
||||
x1, y1 = self.boxes[:, 0], self.boxes[:, 1]
|
||||
x2, y2 = self.boxes[:, 2], self.boxes[:, 3]
|
||||
|
||||
# Left-Bottom cornpoint
|
||||
condt1 = all(x1 < 5) and all(y2 > self.imgshape[1]-5)
|
||||
|
||||
# Right-Bottom cornpoint
|
||||
condt2 = all(x2 > self.imgshape[0]-5) and all(y2 > self.imgshape[1]-5)
|
||||
|
||||
condt = condt1 or condt2
|
||||
|
||||
return condt
|
||||
|
||||
def is_edge_cornpoint(self):
|
||||
'''基于 all(boxes),boxes是否和图像左右边缘重叠'''
|
||||
x1, x2 = self.boxes[:, 0], self.boxes[:, 2]
|
||||
condt = all(x1 < 3) or all(x2 > self.imgshape[0]-3)
|
||||
|
||||
return condt
|
||||
|
||||
def is_bottom_cornpoint(self):
|
||||
'''基于 all(boxes),boxes是否和图像下边缘重叠'''
|
||||
condt = all(self.boxes[:, 3] > self.imgshape[1]-20)
|
||||
|
||||
return condt
|
||||
|
||||
def compute_static_fids(self, y, STATIC_THRESH = 8):
|
||||
'''
|
||||
前摄时,y一般选择为 box 的 y1 坐标,且需限定商品在购物车内。
|
||||
inputs:
|
||||
y:1D array,
|
||||
parameters:
|
||||
STATIC_THRESH:轨迹处于静止状态的阈值。
|
||||
outputs:
|
||||
输出为差分值小于 STATIC_THRESH 的y中元素的(start, end)索引
|
||||
ranges = [(x1, y1),
|
||||
(x1, y1),
|
||||
...]
|
||||
'''
|
||||
# print(f"The ID is: {self.tid}")
|
||||
|
||||
det_y = np.diff(y, axis=0)
|
||||
ranges, rangex = [], []
|
||||
|
||||
static_indices = np.where(np.abs(det_y) < STATIC_THRESH)[0]
|
||||
|
||||
if len(static_indices) == 0:
|
||||
rangex.append((0, len(y)-1))
|
||||
return ranges, rangex
|
||||
|
||||
start_index = static_indices[0]
|
||||
|
||||
for i in range(1, len(static_indices)):
|
||||
if static_indices[i] != static_indices[i-1] + 1:
|
||||
ranges.append((start_index, static_indices[i-1] + 1))
|
||||
start_index = static_indices[i]
|
||||
ranges.append((start_index, static_indices[-1] + 1))
|
||||
|
||||
if len(ranges) == 0:
|
||||
rangex.append((0, len(y)-1))
|
||||
return ranges, rangex
|
||||
|
||||
idx1, idx2 = ranges[0][0], ranges[-1][1]
|
||||
|
||||
if idx1 != 0:
|
||||
rangex.append((0, idx1))
|
||||
|
||||
# 轨迹的最后阶段是运动状态
|
||||
for k in range(1, len(ranges)):
|
||||
index1 = ranges[k-1][1]
|
||||
index2 = ranges[k][0]
|
||||
rangex.append((index1, index2))
|
||||
|
||||
if idx2 != len(y)-1:
|
||||
rangex.append((idx2, len(y)-1))
|
||||
|
||||
return ranges, rangex
|
||||
|
||||
def is_Static(self):
|
||||
assert self.frnum > 1, "boxes number must greater than 1"
|
||||
# print(f"The ID is: {self.tid}")
|
||||
|
||||
# 手部和小孩目标不考虑
|
||||
if self.cls == 0 or self.cls == 9:
|
||||
return False
|
||||
|
||||
# boxes 全部 y2=1280
|
||||
if self.isBotmpoint:
|
||||
return True
|
||||
|
||||
boxes = self.boxes
|
||||
y0 = (boxes[:, 1]+boxes[:, 3])/2
|
||||
|
||||
## 纵轴矢量和
|
||||
sum_y0 = y0[-1] - y0[0]
|
||||
sum_y1 = boxes[-1, 1]-boxes[0, 1]
|
||||
sum_y2 = boxes[-1, 3]-boxes[0, 3]
|
||||
|
||||
# 一些需要考虑的特殊情况
|
||||
isbottom = max(boxes[:, 3]) > 1280-3
|
||||
istop = min(boxes[:, 1]) < 3
|
||||
isincart = min(y0) > self.CART_HIGH_THRESH1
|
||||
uncert = abs(sum_y1)<100 and abs(sum_y2)<100
|
||||
|
||||
'''初始条件:商品中心点始终在购物车内、'''
|
||||
condt0 = max((boxes[:, 1]+boxes[:, 3])/2) > self.CART_HIGH_THRESH1
|
||||
|
||||
'''条件1:轨迹运动纵向和(y1 或 y2)描述商品轨迹长度,存在情况:
|
||||
(1). 检测框可能与图像上下边缘重合,
|
||||
(2). 上边或下边存在跳动
|
||||
'''
|
||||
if isbottom and istop:
|
||||
condt1 = abs(sum_y0) < 300
|
||||
elif isbottom: # y2在底部,用y1表征运动
|
||||
condt1 = sum_y1 > -120 and abs(sum_y0)<80 # 有底部点,方向向上阈值小于100
|
||||
elif istop: # y1在顶部,用y2表征运动
|
||||
condt1 = abs(sum_y2) < 100
|
||||
else:
|
||||
condt1 = (abs(sum_y1) < 30 or abs(sum_y2)<30)
|
||||
|
||||
'''条件2:轨迹的开始和结束阶段均处于静止状态, 利用静止状态区间判断,用 y1
|
||||
a. 商品在购物车内,
|
||||
b. 检测框的起始阶段和结束阶段均为静止状态
|
||||
c. 静止帧长度 > 3'''
|
||||
|
||||
condt2 = False
|
||||
if len(self.static_y1)>=2:
|
||||
condt_s0 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 3
|
||||
condt_s1 = self.static_y1[-1][1]==self.frnum-1 and self.static_y1[-1][1] - self.static_y1[-1][0] >= 3
|
||||
condt2 = condt_s0 and condt_s1 and isincart
|
||||
|
||||
|
||||
condt = condt0 and (condt1 or condt2)
|
||||
|
||||
return condt
|
||||
|
||||
|
||||
def extract_hand_features(self):
|
||||
self.isHandStatic = False
|
||||
|
||||
x0 = (self.boxes[:, 0] + self.boxes[:, 2]) / 2
|
||||
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
|
||||
|
||||
handXy = np.stack((x0, y0), axis=-1)
|
||||
handMaxY0 = np.max(y0)
|
||||
|
||||
handCenter = np.array([(max(x0)+min(x0))/2, (max(y0)+min(y0))/2])
|
||||
|
||||
handMaxDist = np.max(np.linalg.norm(handXy - handCenter))
|
||||
|
||||
if handMaxDist < self.HAND_STATIC_THRESH:
|
||||
self.isHandStatic = True
|
||||
return
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def is_Upward(self):
|
||||
'''判断商品是否取出,'''
|
||||
print(f"The ID is: {self.tid}")
|
||||
|
||||
def is_FreeMove(self):
|
||||
if self.frnum == 1:
|
||||
return True
|
||||
# print(f"The ID is: {self.tid}")
|
||||
|
||||
|
||||
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
|
||||
det_y0 = np.diff(y0, axis=0)
|
||||
sum_y0 = y0[-1] - y0[0]
|
||||
|
||||
'''情况1:中心点向下 '''
|
||||
## 初始条件:商品第一次检测到在购物车内
|
||||
condt0 = y0[0] > self.CART_HIGH_THRESH1
|
||||
|
||||
condt_a = False
|
||||
## 条件1:商品初始为静止状态,静止条件应严格一些
|
||||
condt11, condt12 = False, False
|
||||
if len(self.static_y1)>0:
|
||||
condt11 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 5
|
||||
if len(self.static_y2)>0:
|
||||
condt12 = self.static_y2[0][0]==0 and self.static_y2[0][1] - self.static_y2[0][0] >= 5
|
||||
|
||||
# 条件2:商品中心发生向下移动
|
||||
condt2 = y0[-1] > y0[0]
|
||||
|
||||
# 综合判断a
|
||||
condt_a = condt0 and (condt11 or condt12) and condt2
|
||||
|
||||
'''情况2:中心点向上 '''
|
||||
## 商品中心点向上移动,但没有关联的Hand轨迹,也不是左右边界点
|
||||
condt_b = condt0 and len(self.hands)==0 and y0[-1] < y0[0] and (not self.is_edge_cornpoint())
|
||||
|
||||
|
||||
'''情况3: 商品在购物车内,但运动方向无序'''
|
||||
## 中心点在购物车内,纵向轨迹和小于轨迹差中绝对值最大的两个值的和,说明运动没有主方向
|
||||
condt_c = False
|
||||
if self.frnum > 3:
|
||||
condt_c = all(y0>self.CART_HIGH_THRESH1) and \
|
||||
(abs(sum_y0) < sum(np.sort(np.abs(det_y0))[::-1][:2])-1)
|
||||
|
||||
condt = (condt_a or condt_b or condt_c) and self.cls!=0
|
||||
|
||||
return condt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
254
ytracking/tracking/have_tracking.py
Normal file
254
ytracking/tracking/have_tracking.py
Normal file
@ -0,0 +1,254 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Wed Sep 20 17:33:00 2023
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import sys
|
||||
|
||||
import cv2
|
||||
import os
|
||||
import numpy as np
|
||||
import time
|
||||
import pickle
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.spatial.distance import cdist
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# ================= using for import ultralytics
|
||||
|
||||
# sys.path.append(r"D:\DeepLearning\yolov5_track")
|
||||
|
||||
# from utils.proBoxes import boxes_add_fid
|
||||
from ytracking.tracking.utils.plotting import boxing_img # , Annotator, colors,
|
||||
from ytracking.tracking.utils.gen import Profile
|
||||
from ytracking.tracking.utils.drawtracks import draw5points, drawTrack, drawtracefeat, plot_frameID_y2, drawFeatures, \
|
||||
draw_all_trajectories
|
||||
from ytracking.tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
|
||||
from ytracking.tracking.trackers import BOTSORT, BYTETracker
|
||||
|
||||
sys.path.append("ytracking/tracking/")
|
||||
from dotrack.dotracks_back import doBackTracks
|
||||
from dotrack.dotracks_front import doFrontTracks
|
||||
|
||||
|
||||
# from utils.mergetrack import track_equal_track
|
||||
# from utils.basetrack import MoveState, ShoppingCart, doTracks
|
||||
|
||||
def init_tracker(tracker_yaml=None, bs=1):
|
||||
"""
|
||||
Initialize tracker for object tracking during prediction.
|
||||
"""
|
||||
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
|
||||
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
|
||||
|
||||
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
|
||||
|
||||
return tracker
|
||||
|
||||
|
||||
def have_tracked_front():
|
||||
'''前摄轨迹处理。已执行跟踪处理,只对跟踪结果进行分析'''
|
||||
|
||||
featdir = r"./data/trackfeats"
|
||||
npydir = r"./data/tracks"
|
||||
k = 0
|
||||
gt = Profile()
|
||||
for filename in os.listdir(npydir):
|
||||
# filename = "084501222314_20240108-143651_front.npy"
|
||||
if not filename.find("front") >= 0: continue
|
||||
|
||||
file, ext = os.path.splitext(filename)
|
||||
fpath = os.path.join(npydir, filename)
|
||||
featpath = os.path.join(featdir, file + '.pkl')
|
||||
|
||||
bboxes = np.load(fpath)
|
||||
features_dict = np.load(featpath, allow_pickle=True)
|
||||
|
||||
with gt:
|
||||
vts = doFrontTracks(bboxes, features_dict)
|
||||
vts.classify()
|
||||
|
||||
plt = plot_frameID_y2(vts)
|
||||
plt.savefig(f'./result/{file}_y2.png')
|
||||
plt.close()
|
||||
|
||||
print(file + f" need time: {gt.dt:.2f}s")
|
||||
|
||||
# edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
|
||||
# draw_all_trajectories(vts, edgeline, save_dir, filename)
|
||||
|
||||
# k += 1
|
||||
# if k == 1:
|
||||
# break
|
||||
|
||||
|
||||
def have_tracked_back():
|
||||
'''后摄轨迹处理。已执行跟踪处理,只对跟踪结果进行分析'''
|
||||
featdir = r"./data/trackfeats"
|
||||
npydir = r"./data/tracks"
|
||||
k = 0
|
||||
alltracks = []
|
||||
gt = Profile()
|
||||
for filename in os.listdir(npydir):
|
||||
# filename = "084501222314_20240108-143656_back.npy" # "加购_55.npy"
|
||||
if not filename.find("back") >= 0: continue
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
file, ext = os.path.splitext(filename)
|
||||
fpath = os.path.join(npydir, filename)
|
||||
featpath = os.path.join(featdir, file + '.pkl')
|
||||
|
||||
# try:
|
||||
bboxes = np.load(fpath)
|
||||
features_dict = np.load(featpath, allow_pickle=True)
|
||||
|
||||
with gt:
|
||||
vts = doBackTracks(bboxes, features_dict)
|
||||
vts.classify()
|
||||
# vts.merge_tracks()
|
||||
|
||||
print(file + f" need time: {gt.dt:.2f}s")
|
||||
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
|
||||
draw_all_trajectories(vts, edgeline, save_dir, filename)
|
||||
alltracks.append(vts)
|
||||
|
||||
# except Exception as e:
|
||||
# # print(str(e))
|
||||
# pass
|
||||
# print(file+" need time: {:.2f}s".format(time.time()-t1))
|
||||
k += 1
|
||||
if k == 1:
|
||||
break
|
||||
|
||||
if len(alltracks):
|
||||
drawFeatures(alltracks, save_dir)
|
||||
|
||||
|
||||
def tracking(vboxes):
|
||||
tracker_yaml = r"./trackers/cfg/botsort.yaml"
|
||||
tracker = init_tracker(tracker_yaml)
|
||||
tboxes = []
|
||||
images = []
|
||||
|
||||
track_boxes = np.empty((0, 9), dtype=np.float32)
|
||||
features_dict = {}
|
||||
|
||||
'''==================== 执行跟踪处理 ======================='''
|
||||
for det, img, frame in vboxes:
|
||||
# 需要根据frame_id重排序
|
||||
det_tracking = Boxes(det).cpu().numpy()
|
||||
H, W = img.shape[:2]
|
||||
imgs = []
|
||||
for d in range(np.size(det, 0)):
|
||||
tlbr = det[d, :4].astype(np.int_)
|
||||
tlbr[0] = max(0, tlbr[0])
|
||||
tlbr[1] = max(0, tlbr[1])
|
||||
tlbr[2] = min(W - 1, tlbr[2])
|
||||
tlbr[3] = min(H - 1, tlbr[3])
|
||||
patch = img[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
|
||||
patch = patch[:, :, ::-1] # 原程序用PIL.Image读取,是RGB,Opencv是BGR
|
||||
imgs.append(patch)
|
||||
|
||||
tracks = tracker.update(det_tracking, imgs)
|
||||
|
||||
if len(tracks):
|
||||
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
|
||||
feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
|
||||
frame_id = tracks[0, 7]
|
||||
features_dict.update({int(frame_id): feat_dict})
|
||||
|
||||
# det = tracks[:, :-1]
|
||||
# tboxes.append((det, frame))
|
||||
|
||||
imgx = boxing_img(tracks, img)
|
||||
images.append((imgx, frame))
|
||||
|
||||
# bboxes = boxes_add_fid(tboxes)
|
||||
|
||||
vts = doBackTracks(track_boxes, features_dict)
|
||||
vts.classify()
|
||||
|
||||
return vts, images
|
||||
|
||||
|
||||
def do_tracking():
|
||||
pkldir = r"./data/boxes_imgs"
|
||||
k = 0
|
||||
save_result = True
|
||||
alltracks = []
|
||||
gt = Profile()
|
||||
for filename in os.listdir(pkldir):
|
||||
filename = "加购_18.pkl"
|
||||
file, _ = os.path.splitext(filename)
|
||||
vboxes = []
|
||||
##================================ load the detection data
|
||||
with open(pkldir + f'/{filename}', 'rb') as f:
|
||||
vboxes = pickle.load(f)
|
||||
assert len(vboxes) > 0
|
||||
|
||||
with gt:
|
||||
vts, images = tracking(vboxes)
|
||||
alltracks.append(vts)
|
||||
|
||||
print(file + f" need time: {gt.dt * 1E3:.1f}ms")
|
||||
##================================ save images, video, track-trajectory
|
||||
if save_result == True:
|
||||
curdir = imgdir.joinpath(file)
|
||||
if not curdir.exists():
|
||||
curdir.mkdir(parents=True, exist_ok=True)
|
||||
vidpath = str(curdir.joinpath(file).with_suffix('.mp4'))
|
||||
|
||||
fps, w, h = 30, images[0][0].shape[1], images[0][0].shape[0]
|
||||
vidwriter = cv2.VideoWriter(vidpath, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
for img, frame in images:
|
||||
imgpath = curdir.joinpath(file + f"_{frame}.png")
|
||||
cv2.imwrite(imgpath, img)
|
||||
vidwriter.write(img)
|
||||
vidwriter.release()
|
||||
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
|
||||
draw_all_trajectories(vts, edgeline, save_dir, filename)
|
||||
|
||||
k += 1
|
||||
if k == 1:
|
||||
break
|
||||
|
||||
drawFeatures(alltracks, save_dir)
|
||||
|
||||
|
||||
def have_tracked(bboxes, features_dict, camera_id):
|
||||
if camera_id == '0':
|
||||
vts = doBackTracks(bboxes, features_dict)
|
||||
vts.classify()
|
||||
elif camera_id == '1':
|
||||
vts = doFrontTracks(bboxes, features_dict)
|
||||
vts.classify()
|
||||
else:
|
||||
raise ValueError("have no camera_id")
|
||||
return vts
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
now = datetime.now()
|
||||
time_string = now.strftime("%Y%m%d%H%M%S")[:8]
|
||||
|
||||
# save_dir = Path(f'./result/{time_string}_traj/')
|
||||
# if not save_dir.exists():
|
||||
# save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
save_dir = Path(f'./result/')
|
||||
|
||||
mode = "merge" ## "merge": 已完成跟踪处理, "other": 未执行跟踪处理
|
||||
if mode == "merge":
|
||||
# have_tracked_back()
|
||||
have_tracked_front()
|
||||
else:
|
||||
'''执行do_tracking()函数时视频和图像存储位置'''
|
||||
imgdir = Path(f'./result/{time_string}_imgs/')
|
||||
if not imgdir.exists():
|
||||
imgdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
do_tracking()
|
173
ytracking/tracking/test_merge.py
Normal file
173
ytracking/tracking/test_merge.py
Normal file
@ -0,0 +1,173 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Fri Feb 23 11:04:48 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
import cv2
|
||||
from scipy.spatial.distance import cdist
|
||||
# from trackers.utils import matching
|
||||
|
||||
def readDict(boxes, feat_dicts):
|
||||
feat = []
|
||||
for i in range(boxes.shape[0]):
|
||||
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
|
||||
feat.append(feat_dicts[fid][bid])
|
||||
|
||||
# img = feat_dicts[fid][f'{bid}_img']
|
||||
# cv2.imwrite(f'./result/imgs/{tid}_{fid}_{bid}.png', img)
|
||||
|
||||
return np.asarray(feat, dtype=np.float32)
|
||||
|
||||
|
||||
|
||||
def track_equal_track(atrack, btrack, feat_dicts):
|
||||
# boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
|
||||
aboxes = atrack.boxes
|
||||
bboxes = btrack.boxes
|
||||
|
||||
''' 1. 判断轨迹在时序上是否有交集 '''
|
||||
afids = aboxes[:, 7].astype(np.int_)
|
||||
bfids = bboxes[:, 7].astype(np.int_)
|
||||
# 帧索引交集
|
||||
interfid = set(afids).intersection(set(bfids))
|
||||
|
||||
# 或者直接判断帧索引是否有交集,返回 Ture or False
|
||||
# interfid = set(afids).isdisjoint(set(bfids))
|
||||
|
||||
|
||||
''' 2. 轨迹空间iou'''
|
||||
alabel = np.array([0] * afids.size, dtype=np.int_)
|
||||
blabel = np.array([1] * bfids.size, dtype=np.int_)
|
||||
|
||||
label = np.concatenate((alabel, blabel), axis=0)
|
||||
fids = np.concatenate((afids, bfids), axis=0)
|
||||
indices = np.argsort(fids)
|
||||
idx_pair = []
|
||||
for i in range(len(indices)-1):
|
||||
idx1, idx2 = indices[i], indices[i+1]
|
||||
if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] == 1:
|
||||
if label[idx1] == 0:
|
||||
a_idx = idx1
|
||||
b_idx = idx2-alabel.size
|
||||
else:
|
||||
a_idx = idx2
|
||||
b_idx = idx1-alabel.size
|
||||
|
||||
idx_pair.append((a_idx, b_idx))
|
||||
|
||||
ious = []
|
||||
for a, b in idx_pair:
|
||||
abox, bbox = aboxes[a, :], bboxes[b, :]
|
||||
|
||||
xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2
|
||||
xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
|
||||
|
||||
xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
|
||||
xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2
|
||||
|
||||
|
||||
inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
|
||||
(np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
|
||||
|
||||
# Union Area
|
||||
box1_area = abox[2] * abox[3]
|
||||
box2_area = bbox[2] * bbox[3]
|
||||
union = box1_area + box2_area - inter + 1e-6
|
||||
|
||||
ious.append(inter/union)
|
||||
|
||||
''' 3. 轨迹特征相似度判断'''
|
||||
afeat = readDict(aboxes, feat_dicts)
|
||||
bfeat = readDict(bboxes, feat_dicts)
|
||||
feat = np.concatenate((afeat, bfeat), axis=0)
|
||||
|
||||
emb_simil = 1-np.maximum(0.0, cdist(feat, feat, 'cosine'))
|
||||
emb_ = 1-cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine')
|
||||
|
||||
cont1 = False if len(interfid) else True
|
||||
cont2 = all(iou>0.5 for iou in ious)
|
||||
cont3 = emb_[0, 0]>0.75
|
||||
|
||||
cont = cont1 and cont2 and cont3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return cont
|
||||
|
||||
|
||||
|
||||
def track_equal_str(atrack, btrack):
|
||||
if atrack == btrack:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def merge_track(Residual):
|
||||
out_list = []
|
||||
alist = [t for t in Residual]
|
||||
while alist:
|
||||
atrack = alist[0]
|
||||
cur_list = []
|
||||
cur_list.append(atrack)
|
||||
alist.pop(0)
|
||||
|
||||
blist = [b for b in alist]
|
||||
alist = []
|
||||
for btrack in blist:
|
||||
if track_equal_str(atrack, btrack):
|
||||
cur_list.append(btrack)
|
||||
else:
|
||||
alist.append(btrack)
|
||||
|
||||
out_list.append(cur_list)
|
||||
return out_list
|
||||
|
||||
def main():
|
||||
Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
|
||||
out_list = merge_track(Residual)
|
||||
|
||||
print(Residual)
|
||||
print(out_list)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# for i, atrack in enumerate(input_list):
|
||||
# cur_list = []
|
||||
# cur_list.append(atrack)
|
||||
# del input_list[i]
|
||||
#
|
||||
# for j, btrack in enumerate(input_list):
|
||||
# if track_equal(atrack, btrack):
|
||||
# cur_list.append(btrack)
|
||||
# del input_list[j]
|
||||
#
|
||||
# out_list.append(cur_list)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
94
ytracking/tracking/trackers/README.md
Normal file
94
ytracking/tracking/trackers/README.md
Normal file
@ -0,0 +1,94 @@
|
||||
# Tracker
|
||||
|
||||
## Supported Trackers
|
||||
|
||||
- [x] ByteTracker
|
||||
- [x] BoT-SORT
|
||||
|
||||
## Usage
|
||||
|
||||
### python interface:
|
||||
|
||||
You can use the Python interface to track objects using the YOLO model.
|
||||
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO("yolov8n.pt") # or a segmentation model .i.e yolov8n-seg.pt
|
||||
model.track(
|
||||
source="video/streams",
|
||||
stream=True,
|
||||
tracker="botsort.yaml", # or 'bytetrack.yaml'
|
||||
show=True,
|
||||
)
|
||||
```
|
||||
|
||||
You can get the IDs of the tracked objects using the following code:
|
||||
|
||||
```python
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO("yolov8n.pt")
|
||||
|
||||
for result in model.track(source="video.mp4"):
|
||||
print(
|
||||
result.boxes.id.cpu().numpy().astype(int)
|
||||
) # this will print the IDs of the tracked objects in the frame
|
||||
```
|
||||
|
||||
If you want to use the tracker with a folder of images or when you loop on the video frames, you should use the `persist` parameter to tell the model that these frames are related to each other so the IDs will be fixed for the same objects. Otherwise, the IDs will be different in each frame because in each loop, the model creates a new object for tracking, but the `persist` parameter makes it use the same object for tracking.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from ultralytics import YOLO
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
model = YOLO("yolov8n.pt")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
results = model.track(frame, persist=True)
|
||||
boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
|
||||
ids = results[0].boxes.id.cpu().numpy().astype(int)
|
||||
for box, id in zip(boxes, ids):
|
||||
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Id {id}",
|
||||
(box[0], box[1]),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 0, 255),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("frame", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
```
|
||||
|
||||
## Change tracker parameters
|
||||
|
||||
You can change the tracker parameters by editing the `tracker.yaml` file which is located in the ultralytics/cfg/trackers folder.
|
||||
|
||||
## Command Line Interface (CLI)
|
||||
|
||||
You can also use the command line interface to track objects using the YOLO model.
|
||||
|
||||
```bash
|
||||
yolo detect track source=... tracker=...
|
||||
yolo segment track source=... tracker=...
|
||||
yolo pose track source=... tracker=...
|
||||
```
|
||||
|
||||
By default, trackers will use the configuration in `ultralytics/cfg/trackers`. We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/cfg/trackers`.
|
||||
|
||||
## Contribute to Our Trackers Section
|
||||
|
||||
Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section! Your real-world applications and solutions could be invaluable for users working on tracking tasks.
|
||||
|
||||
By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community.
|
||||
|
||||
To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table!
|
||||
|
||||
Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏!
|
10
ytracking/tracking/trackers/__init__.py
Normal file
10
ytracking/tracking/trackers/__init__.py
Normal file
@ -0,0 +1,10 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .bot_sort import BOTSORT
|
||||
from .byte_tracker import BYTETracker
|
||||
from .track import register_tracker
|
||||
|
||||
|
||||
|
||||
__all__ = 'register_tracker', 'BOTSORT', 'BYTETracker' # allow simpler import
|
||||
|
BIN
ytracking/tracking/trackers/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/basetrack.cpython-38.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/basetrack.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/basetrack.cpython-39.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/basetrack.cpython-39.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/bot_sort.cpython-38.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/bot_sort.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/bot_sort.cpython-39.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/bot_sort.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/track.cpython-38.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/track.cpython-38.pyc
Normal file
Binary file not shown.
BIN
ytracking/tracking/trackers/__pycache__/track.cpython-39.pyc
Normal file
BIN
ytracking/tracking/trackers/__pycache__/track.cpython-39.pyc
Normal file
Binary file not shown.
71
ytracking/tracking/trackers/basetrack.py
Normal file
71
ytracking/tracking/trackers/basetrack.py
Normal file
@ -0,0 +1,71 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TrackState:
|
||||
"""Enumeration of possible object tracking states."""
|
||||
|
||||
New = 0
|
||||
Tracked = 1
|
||||
Lost = 2
|
||||
Removed = 3
|
||||
|
||||
|
||||
class BaseTrack:
|
||||
"""Base class for object tracking, handling basic track attributes and operations."""
|
||||
|
||||
_count = 0
|
||||
|
||||
track_id = 0
|
||||
is_activated = False
|
||||
state = TrackState.New
|
||||
|
||||
history = OrderedDict()
|
||||
features = []
|
||||
curr_feature = None
|
||||
score = 0
|
||||
start_frame = 0
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
# Multi-camera
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
def end_frame(self):
|
||||
"""Return the last frame ID of the track."""
|
||||
return self.frame_id
|
||||
|
||||
@staticmethod
|
||||
def next_id():
|
||||
"""Increment and return the global track ID counter."""
|
||||
BaseTrack._count += 1
|
||||
return BaseTrack._count
|
||||
|
||||
def activate(self, *args):
|
||||
"""Activate the track with the provided arguments."""
|
||||
raise NotImplementedError
|
||||
|
||||
def predict(self):
|
||||
"""Predict the next state of the track."""
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
"""Update the track with new observations."""
|
||||
raise NotImplementedError
|
||||
|
||||
def mark_lost(self):
|
||||
"""Mark the track as lost."""
|
||||
self.state = TrackState.Lost
|
||||
|
||||
def mark_removed(self):
|
||||
"""Mark the track as removed."""
|
||||
self.state = TrackState.Removed
|
||||
|
||||
@staticmethod
|
||||
def reset_id():
|
||||
"""Reset the global track ID counter."""
|
||||
BaseTrack._count = 0
|
198
ytracking/tracking/trackers/bot_sort.py
Normal file
198
ytracking/tracking/trackers/bot_sort.py
Normal file
@ -0,0 +1,198 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .basetrack import TrackState
|
||||
from .byte_tracker import BYTETracker, STrack
|
||||
from .utils import matching
|
||||
# from .utils.gmc import GMC
|
||||
from .utils.kalman_filter import KalmanFilterXYWH
|
||||
|
||||
from .reid.reid_interface import ReIDInterface
|
||||
# from .reid.config import config
|
||||
from tools.config import config
|
||||
class BOTrack(STrack):
|
||||
shared_kalman = KalmanFilterXYWH()
|
||||
|
||||
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
|
||||
"""Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
|
||||
super().__init__(tlwh, score, cls)
|
||||
|
||||
self.smooth_feat = None
|
||||
self.curr_feat = None
|
||||
if feat is not None:
|
||||
self.update_features(feat)
|
||||
self.features = deque([], maxlen=feat_history)
|
||||
self.alpha = 0.9
|
||||
|
||||
def update_features(self, feat):
|
||||
"""Update features vector and smooth it using exponential moving average."""
|
||||
feat /= np.linalg.norm(feat)
|
||||
self.curr_feat = feat
|
||||
if self.smooth_feat is None:
|
||||
self.smooth_feat = feat
|
||||
else:
|
||||
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
|
||||
self.features.append(feat)
|
||||
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
|
||||
|
||||
def predict(self):
|
||||
"""Predicts the mean and covariance using Kalman filter."""
|
||||
mean_state = self.mean.copy()
|
||||
if self.state != TrackState.Tracked:
|
||||
mean_state[6] = 0
|
||||
mean_state[7] = 0
|
||||
|
||||
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
"""Reactivates a track with updated features and optionally assigns a new ID."""
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
super().re_activate(new_track, frame_id, new_id)
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
"""Update the YOLOv8 instance with new track and frame ID."""
|
||||
if new_track.curr_feat is not None:
|
||||
self.update_features(new_track.curr_feat)
|
||||
super().update(new_track, frame_id)
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def multi_predict(stracks):
|
||||
"""Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
|
||||
if len(stracks) <= 0:
|
||||
return
|
||||
multi_mean = np.asarray([st.mean.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.covariance for st in stracks])
|
||||
for i, st in enumerate(stracks):
|
||||
if st.state != TrackState.Tracked:
|
||||
multi_mean[i][6] = 0
|
||||
multi_mean[i][7] = 0
|
||||
multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
stracks[i].mean = mean
|
||||
stracks[i].covariance = cov
|
||||
|
||||
def convert_coords(self, tlwh):
|
||||
"""Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
|
||||
return self.tlwh_to_xywh(tlwh)
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xywh(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, width,
|
||||
height)`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
return ret
|
||||
|
||||
|
||||
class BOTSORT(BYTETracker):
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
|
||||
super().__init__(args, frame_rate)
|
||||
# ReID module
|
||||
self.proximity_thresh = args.proximity_thresh
|
||||
self.appearance_thresh = args.appearance_thresh
|
||||
|
||||
if args.with_reid:
|
||||
# Haven't supported BoT-SORT(reid) yet
|
||||
self.encoder = ReIDInterface(config)
|
||||
|
||||
# self.gmc = GMC(method=args.gmc_method) # commented by WQG
|
||||
|
||||
def get_kalmanfilter(self):
|
||||
"""Returns an instance of KalmanFilterXYWH for object tracking."""
|
||||
return KalmanFilterXYWH()
|
||||
|
||||
def init_track(self, dets, scores, cls, imgs):
|
||||
"""Initialize track with detections, scores, and classes."""
|
||||
if len(dets) == 0:
|
||||
return []
|
||||
if self.args.with_reid and self.encoder is not None:
|
||||
features_keep = self.encoder.inference(imgs, dets)
|
||||
return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)] # detections
|
||||
else:
|
||||
return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections
|
||||
|
||||
def get_dists(self, tracks, detections):
|
||||
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
|
||||
dists = matching.iou_distance(tracks, detections)
|
||||
# proximity_thresh 应该设较大的值,表示只有两个boxes离得较远时,不考虑reid特征
|
||||
dists_mask = (dists > self.proximity_thresh)
|
||||
|
||||
# TODO: mot20
|
||||
# if not self.args.mot20:
|
||||
dists = matching.fuse_score(dists, detections)
|
||||
|
||||
if self.args.with_reid and self.encoder is not None:
|
||||
emb_dists = matching.embedding_distance(tracks, detections) / 2.0
|
||||
emb_dists[emb_dists > self.appearance_thresh] = 1.0
|
||||
emb_dists[dists_mask] = 1.0
|
||||
dists = np.minimum(dists, emb_dists)
|
||||
|
||||
return dists
|
||||
|
||||
def get_dists_1(self, tracks, detections):
|
||||
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
|
||||
iou_dists = matching.iou_distance(tracks, detections)
|
||||
iou_dists_mask = (iou_dists>0.9)
|
||||
|
||||
iou_dists = matching.fuse_score(iou_dists, detections)
|
||||
weight = 0.4
|
||||
if self.args.with_reid and self.encoder is not None:
|
||||
emb_dists = matching.embedding_distance(tracks, detections)
|
||||
|
||||
'''============ iou_dists 和 emb_dists 融合有两种策略 ==========='''
|
||||
'''1. reid 相似度阈值,低于该值的两 boxes 图像不可能是同一对象,需要确定一个合理的可信阈值
|
||||
2. iou 的约束为若约束,故 iou_dists 应设置为较大的值
|
||||
'''
|
||||
emb_dists_mask = (emb_dists > 0.85)
|
||||
iou_dists[emb_dists_mask] = 1
|
||||
emb_dists[iou_dists_mask] = 1
|
||||
|
||||
dists = np.minimum(iou_dists, emb_dists)
|
||||
'''2. embed 阈值'''
|
||||
# dists = (1-weight)*iou_dists + weight*emb_dists
|
||||
else:
|
||||
|
||||
dists = iou_dists.copy()
|
||||
|
||||
return dists
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def multi_predict(self, tracks):
|
||||
"""Predict and track multiple objects with YOLOv8 model."""
|
||||
BOTrack.multi_predict(tracks)
|
||||
|
||||
|
||||
def get_result(self):
|
||||
'''written by WQG'''
|
||||
activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
|
||||
for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
|
||||
|
||||
track_features = []
|
||||
if self.args.with_reid and self.encoder is not None:
|
||||
track_features = np.asarray([x.curr_feat for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
|
||||
|
||||
|
||||
return (activate_tracks, track_features)
|
424
ytracking/tracking/trackers/byte_tracker.py
Normal file
424
ytracking/tracking/trackers/byte_tracker.py
Normal file
@ -0,0 +1,424 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .utils import matching
|
||||
from .utils.kalman_filter import KalmanFilterXYAH
|
||||
|
||||
|
||||
def dists_update(dists, strack_pool, detections):
|
||||
'''written by WQG'''
|
||||
|
||||
if len(strack_pool) and len(detections):
|
||||
# alabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in strack_pool])
|
||||
# blabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in detections])
|
||||
|
||||
alabel = np.array([int(stack.cls) for stack in strack_pool])
|
||||
blabel = np.array([int(stack.cls) for stack in detections])
|
||||
amlabel = np.expand_dims(alabel, axis=1).repeat(len(detections),axis=1)
|
||||
bmlabel = np.expand_dims(blabel, axis=0).repeat(len(strack_pool),axis=0)
|
||||
dist_label = 1 - (bmlabel == amlabel)
|
||||
dists = np.where(dists > dist_label, dists, dist_label)
|
||||
return dists
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
shared_kalman = KalmanFilterXYAH()
|
||||
|
||||
def __init__(self, tlwh, score, cls):
|
||||
"""wait activate."""
|
||||
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
|
||||
self.kalman_filter = None
|
||||
self.mean, self.covariance = None, None
|
||||
self.is_activated = False
|
||||
|
||||
self.score = score
|
||||
self.tracklet_len = 0
|
||||
self.cls = cls
|
||||
self.idx = tlwh[-1]
|
||||
|
||||
def predict(self):
|
||||
"""Predicts mean and covariance using Kalman filter."""
|
||||
mean_state = self.mean.copy()
|
||||
if self.state != TrackState.Tracked:
|
||||
mean_state[7] = 0
|
||||
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
|
||||
|
||||
@staticmethod
|
||||
def multi_predict(stracks):
|
||||
"""Perform multi-object predictive tracking using Kalman filter for given stracks."""
|
||||
if len(stracks) <= 0:
|
||||
return
|
||||
multi_mean = np.asarray([st.mean.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.covariance for st in stracks])
|
||||
for i, st in enumerate(stracks):
|
||||
if st.state != TrackState.Tracked:
|
||||
multi_mean[i][7] = 0
|
||||
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
stracks[i].mean = mean
|
||||
stracks[i].covariance = cov
|
||||
|
||||
@staticmethod
|
||||
def multi_gmc(stracks, H=np.eye(2, 3)):
|
||||
"""Update state tracks positions and covariances using a homography matrix."""
|
||||
if len(stracks) > 0:
|
||||
multi_mean = np.asarray([st.mean.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.covariance for st in stracks])
|
||||
|
||||
R = H[:2, :2]
|
||||
R8x8 = np.kron(np.eye(4, dtype=float), R)
|
||||
t = H[:2, 2]
|
||||
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
|
||||
mean = R8x8.dot(mean)
|
||||
mean[:2] += t
|
||||
cov = R8x8.dot(cov).dot(R8x8.transpose())
|
||||
|
||||
stracks[i].mean = mean
|
||||
stracks[i].covariance = cov
|
||||
|
||||
def activate(self, kalman_filter, frame_id):
|
||||
"""Start a new tracklet."""
|
||||
self.kalman_filter = kalman_filter
|
||||
self.track_id = self.next_id()
|
||||
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
|
||||
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
if frame_id == 1:
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
def re_activate(self, new_track, frame_id, new_id=False):
|
||||
"""Reactivates a previously lost track with a new detection."""
|
||||
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
|
||||
self.convert_coords(new_track.tlwh))
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
self.cls = new_track.cls
|
||||
self.idx = new_track.idx
|
||||
|
||||
def update(self, new_track, frame_id):
|
||||
"""
|
||||
Update a matched track
|
||||
:type new_track: STrack
|
||||
:type frame_id: int
|
||||
:return:
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.tracklet_len += 1
|
||||
|
||||
new_tlwh = new_track.tlwh
|
||||
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
|
||||
self.convert_coords(new_tlwh))
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
|
||||
self.score = new_track.score
|
||||
self.cls = new_track.cls
|
||||
self.idx = new_track.idx
|
||||
|
||||
def convert_coords(self, tlwh):
|
||||
"""Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent."""
|
||||
return self.tlwh_to_xyah(tlwh)
|
||||
|
||||
@property
|
||||
def tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
"""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@property
|
||||
def tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_xyah(tlwh):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlbr_to_tlwh(tlbr):
|
||||
"""Converts top-left bottom-right format to top-left width height format."""
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def tlwh_to_tlbr(tlwh):
|
||||
"""Converts tlwh bounding box format to tlbr format."""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
"""Return a string representation of the BYTETracker object with start and end frames and track ID."""
|
||||
return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
|
||||
|
||||
|
||||
class BYTETracker:
|
||||
|
||||
def __init__(self, args, frame_rate=30):
|
||||
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
|
||||
self.tracked_stracks = [] # type: list[STrack]
|
||||
self.lost_stracks = [] # type: list[STrack]
|
||||
self.removed_stracks = [] # type: list[STrack]
|
||||
|
||||
self.frame_id = 0
|
||||
self.args = args
|
||||
self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
|
||||
self.kalman_filter = self.get_kalmanfilter()
|
||||
self.reset_id()
|
||||
|
||||
def update(self, results, img=None):
|
||||
"""Updates object tracker with new detections and returns tracked object bounding boxes."""
|
||||
self.frame_id += 1
|
||||
activated_stracks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
scores = results.conf
|
||||
bboxes = results.xyxy
|
||||
# Add index
|
||||
bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
|
||||
cls = results.cls
|
||||
|
||||
remain_inds = scores > self.args.track_high_thresh
|
||||
inds_low = scores > self.args.track_low_thresh
|
||||
inds_high = scores < self.args.track_high_thresh
|
||||
|
||||
inds_second = np.logical_and(inds_low, inds_high)
|
||||
dets_second = bboxes[inds_second]
|
||||
dets = bboxes[remain_inds]
|
||||
scores_keep = scores[remain_inds]
|
||||
scores_second = scores[inds_second]
|
||||
cls_keep = cls[remain_inds]
|
||||
cls_second = cls[inds_second]
|
||||
|
||||
detections = self.init_track(dets, scores_keep, cls_keep, img)
|
||||
|
||||
# Add newly detected tracklets to tracked_stracks
|
||||
unconfirmed = []
|
||||
tracked_stracks = [] # type: list[STrack]
|
||||
for track in self.tracked_stracks:
|
||||
if not track.is_activated:
|
||||
unconfirmed.append(track)
|
||||
else:
|
||||
tracked_stracks.append(track)
|
||||
|
||||
|
||||
# Step 2: First association, with high score detection boxes
|
||||
strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
# Predict the current location with KF
|
||||
self.multi_predict(strack_pool)
|
||||
|
||||
# ============================================================= 没必要gmc,WQG
|
||||
# if hasattr(self, 'gmc') and img is not None:
|
||||
# warp = self.gmc.apply(img, dets)
|
||||
# STrack.multi_gmc(strack_pool, warp)
|
||||
# STrack.multi_gmc(unconfirmed, warp)
|
||||
# =============================================================================
|
||||
|
||||
dists = self.get_dists_1(strack_pool, detections)
|
||||
|
||||
'''written by WQG for different class'''
|
||||
dists = dists_update(dists, strack_pool, detections)
|
||||
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
|
||||
# Step 3: Second association, with low score detection boxes
|
||||
# association the untrack to the low score detections
|
||||
detections_second = self.init_track(dets_second, scores_second, cls_second, img)
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
|
||||
|
||||
# TODO
|
||||
dists = matching.iou_distance(r_tracked_stracks, detections_second)
|
||||
'''written by WQG for different class'''
|
||||
dists = dists_update(dists, r_tracked_stracks, detections_second)
|
||||
|
||||
matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_stracks[itracked]
|
||||
det = detections_second[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
for it in u_track:
|
||||
track = r_tracked_stracks[it]
|
||||
if track.state != TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
# Deal with unconfirmed tracks, usually tracks with only one beginning frame
|
||||
detections = [detections[i] for i in u_detection]
|
||||
dists = self.get_dists_1(unconfirmed, detections)
|
||||
'''written by WQG for different class'''
|
||||
dists = dists_update(dists, unconfirmed, detections)
|
||||
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections[idet], self.frame_id)
|
||||
activated_stracks.append(unconfirmed[itracked])
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
# Step 4: Init new stracks
|
||||
for inew in u_detection:
|
||||
track = detections[inew]
|
||||
if track.score < self.args.new_track_thresh:
|
||||
continue
|
||||
track.activate(self.kalman_filter, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
# Step 5: Update state
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.max_time_lost:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_stracks)
|
||||
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
if len(self.removed_stracks) > 1000:
|
||||
self.removed_stracks = self.removed_stracks[-999:] # clip remove stracks to 1000 maximum
|
||||
|
||||
|
||||
return np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
|
||||
for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
|
||||
|
||||
|
||||
def get_result(self):
|
||||
'''written by WQG'''
|
||||
# =============================================================================
|
||||
# activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
|
||||
# for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
|
||||
#
|
||||
# track_features = []
|
||||
# =============================================================================
|
||||
tracks = []
|
||||
feats = []
|
||||
for t in self.tracked_stracks:
|
||||
if t.is_activated:
|
||||
track = t.tlbr.tolist() + [t.track_id, t.score, t.cls, t.idx]
|
||||
feat = track.curr_feature
|
||||
|
||||
tracks.append(track)
|
||||
feats.append(feat)
|
||||
|
||||
tracks = np.asarray(tracks, dtype=np.float32)
|
||||
|
||||
return (tracks, feats)
|
||||
|
||||
|
||||
def get_kalmanfilter(self):
|
||||
"""Returns a Kalman filter object for tracking bounding boxes."""
|
||||
return KalmanFilterXYAH()
|
||||
|
||||
def init_track(self, dets, scores, cls, img=None):
|
||||
"""Initialize object tracking with detections and scores using STrack algorithm."""
|
||||
return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
|
||||
|
||||
def get_dists(self, tracks, detections):
|
||||
"""Calculates the distance between tracks and detections using IOU and fuses scores."""
|
||||
dists = matching.iou_distance(tracks, detections)
|
||||
# TODO: mot20
|
||||
# if not self.args.mot20:
|
||||
dists = matching.fuse_score(dists, detections)
|
||||
return dists
|
||||
|
||||
def multi_predict(self, tracks):
|
||||
"""Returns the predicted tracks using the YOLOv8 network."""
|
||||
STrack.multi_predict(tracks)
|
||||
|
||||
def reset_id(self):
|
||||
"""Resets the ID counter of STrack."""
|
||||
STrack.reset_id()
|
||||
|
||||
@staticmethod
|
||||
def joint_stracks(tlista, tlistb):
|
||||
"""Combine two lists of stracks into a single one."""
|
||||
exists = {}
|
||||
res = []
|
||||
for t in tlista:
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if not exists.get(tid, 0):
|
||||
exists[tid] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def sub_stracks(tlista, tlistb):
|
||||
"""DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
|
||||
stracks = {t.track_id: t for t in tlista}
|
||||
for t in tlistb:
|
||||
tid = t.track_id
|
||||
if stracks.get(tid, 0):
|
||||
del stracks[tid]
|
||||
return list(stracks.values())
|
||||
"""
|
||||
track_ids_b = {t.track_id for t in tlistb}
|
||||
return [t for t in tlista if t.track_id not in track_ids_b]
|
||||
|
||||
@staticmethod
|
||||
def remove_duplicate_stracks(stracksa, stracksb):
|
||||
"""Remove duplicate stracks with non-maximum IOU distance."""
|
||||
pdist = matching.iou_distance(stracksa, stracksb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = [], []
|
||||
for p, q in zip(*pairs):
|
||||
timep = stracksa[p].frame_id - stracksa[p].start_frame
|
||||
timeq = stracksb[q].frame_id - stracksb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
resa = [t for i, t in enumerate(stracksa) if i not in dupa]
|
||||
resb = [t for i, t in enumerate(stracksb) if i not in dupb]
|
||||
return resa, resb
|
18
ytracking/tracking/trackers/cfg/botsort.yaml
Normal file
18
ytracking/tracking/trackers/cfg/botsort.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
|
||||
|
||||
tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
|
||||
track_high_thresh: 0.5 # threshold for the first association
|
||||
track_low_thresh: 0.1 # threshold for the second association
|
||||
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
|
||||
track_buffer: 30 # buffer to calculate the time when to remove tracks
|
||||
match_thresh: 0.8 # threshold for matching tracks
|
||||
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
|
||||
# mot20: False # for tracker evaluation(not used for now)
|
||||
|
||||
# BoT-SORT settings
|
||||
gmc_method: sparseOptFlow # method of global motion compensation
|
||||
# ReID model related thresh (not supported yet)
|
||||
proximity_thresh: 0.5
|
||||
appearance_thresh: 0.25
|
||||
with_reid: True
|
11
ytracking/tracking/trackers/cfg/bytetrack.yaml
Normal file
11
ytracking/tracking/trackers/cfg/bytetrack.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
|
||||
|
||||
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
|
||||
track_high_thresh: 0.5 # threshold for the first association
|
||||
track_low_thresh: 0.1 # threshold for the second association
|
||||
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
|
||||
track_buffer: 30 # buffer to calculate the time when to remove tracks
|
||||
match_thresh: 0.8 # threshold for matching tracks
|
||||
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
|
||||
# mot20: False # for tracker evaluation(not used for now)
|
7
ytracking/tracking/trackers/reid/__init__.py
Normal file
7
ytracking/tracking/trackers/reid/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Fri Jan 19 16:15:35 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
40
ytracking/tracking/trackers/reid/config.py
Normal file
40
ytracking/tracking/trackers/reid/config.py
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Fri Jan 19 14:01:46 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
|
||||
import torch
|
||||
import os
|
||||
# import torchvision.transforms as T
|
||||
class Config:
|
||||
# network settings
|
||||
backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
|
||||
batch_size = 8
|
||||
embedding_size = 256
|
||||
img_size = 224
|
||||
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
model_path = os.path.join(current_path, r"ckpts\resnet18_1220\best.pth")
|
||||
|
||||
# model_path = "./trackers/reid/ckpts/resnet18_1220/best.pth"
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
# =============================================================================
|
||||
# metric = 'arcface' # [cosface, arcface]
|
||||
# drop_ratio = 0.5
|
||||
#
|
||||
# # training settings
|
||||
# checkpoints = "checkpoints/Mobilev3Large_1225" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
|
||||
# restore = False
|
||||
#
|
||||
# test_model = "./checkpoints/resnet18_1220/best.pth"
|
||||
#
|
||||
#
|
||||
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
# pin_memory = True # if memory is large, set it True to speed up a bit
|
||||
# num_workers = 4 # dataloader
|
||||
# =============================================================================
|
||||
|
||||
config = Config()
|
83
ytracking/tracking/trackers/reid/model/BAM.py
Normal file
83
ytracking/tracking/trackers/reid/model/BAM.py
Normal file
@ -0,0 +1,83 @@
|
||||
import torch.nn as nn
|
||||
import torchvision
|
||||
from torch.nn import init
|
||||
|
||||
class Flatten(nn.Module):
|
||||
def forward(self, x):
|
||||
return x.view(x.shape[0], -1)
|
||||
|
||||
class ChannelAttention(nn.Module):
|
||||
def __int__(self,channel,reduction, num_layers):
|
||||
super(ChannelAttention,self).__init__()
|
||||
self.avgpool = nn.AdaptiveAvgPool2d(1)
|
||||
gate_channels = [channel]
|
||||
gate_channels += [len(channel)//reduction]*num_layers
|
||||
gate_channels += [channel]
|
||||
|
||||
self.ca = nn.Sequential()
|
||||
self.ca.add_module('flatten', Flatten())
|
||||
for i in range(len(gate_channels)-2):
|
||||
self.ca.add_module('',nn.Linear(gate_channels[i], gate_channels[i+1]))
|
||||
self.ca.add_module('',nn.BatchNorm1d(gate_channels[i+1]))
|
||||
self.ca.add_module('',nn.ReLU())
|
||||
self.ca.add_module('',nn.Linear(gate_channels[-2], gate_channels[-1]))
|
||||
|
||||
def forward(self, x):
|
||||
res = self.avgpool(x)
|
||||
res = self.ca(res)
|
||||
res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
|
||||
return res
|
||||
|
||||
class SpatialAttention(nn.Module):
|
||||
def __int__(self, channel,reduction=16,num_lay=3,dilation=2):
|
||||
super(SpatialAttention).__init__()
|
||||
self.sa = nn.Sequential()
|
||||
self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel//reduction)*3))
|
||||
self.sa.add_module('',nn.BatchNorm2d(num_features=(channel//reduction)))
|
||||
self.sa.add_module('',nn.ReLU())
|
||||
for i in range(num_lay):
|
||||
self.sa.add_module('', nn.Conv2d(kernel_size=3,
|
||||
in_channels=(channel//reduction),
|
||||
out_channels=(channel//reduction),
|
||||
padding=1,
|
||||
dilation= 2))
|
||||
self.sa.add_module('',nn.BatchNorm2d(channel//reduction))
|
||||
self.sa.add_module('',nn.ReLU())
|
||||
self.sa.add_module('',nn.Conv2d(channel//reduction, 1, kernel_size=1))
|
||||
def forward(self,x):
|
||||
res = self.sa(x)
|
||||
res = res.expand_as(x)
|
||||
return res
|
||||
|
||||
class BAMblock(nn.Module):
|
||||
def __init__(self,channel=512, reduction=16, dia_val=2):
|
||||
super(BAMblock, self).__init__()
|
||||
self.ca = ChannelAttention(channel, reduction)
|
||||
self.sa = SpatialAttention(channel,reduction,dia_val)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
init.kaiming_normal(m.weight, mode='fan_out')
|
||||
if m.bais is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
init.constant_(m.weight, 1)
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
init.normal_(m.weight, std=0.001)
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self,x):
|
||||
b, c, _, _ = x.size()
|
||||
sa_out=self.sa(x)
|
||||
ca_out=self.ca(x)
|
||||
weight=self.sigmoid(sa_out+ca_out)
|
||||
out=(1+weight)*x
|
||||
return out
|
||||
|
||||
if __name__ =="__main__":
|
||||
|
||||
print(512//14)
|
68
ytracking/tracking/trackers/reid/model/CBAM.py
Normal file
68
ytracking/tracking/trackers/reid/model/CBAM.py
Normal file
@ -0,0 +1,68 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
|
||||
class channelAttention(nn.Module):
|
||||
def __init__(self, channel, reduction=16):
|
||||
super(channelAttention, self).__init__()
|
||||
self.Maxpooling = nn.AdaptiveMaxPool2d(1)
|
||||
self.Avepooling = nn.AdaptiveAvgPool2d(1)
|
||||
self.ca = nn.Sequential()
|
||||
self.ca.add_module('conv1',nn.Conv2d(channel, channel//reduction, 1, bias=False))
|
||||
self.ca.add_module('Relu', nn.ReLU())
|
||||
self.ca.add_module('conv2',nn.Conv2d(channel//reduction, channel, 1, bias=False))
|
||||
self.sigmod = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
M_out = self.Maxpooling(x)
|
||||
A_out = self.Avepooling(x)
|
||||
M_out = self.ca(M_out)
|
||||
A_out = self.ca(A_out)
|
||||
out = self.sigmod(M_out+A_out)
|
||||
return out
|
||||
|
||||
class SpatialAttention(nn.Module):
|
||||
def __init__(self, kernel_size=7):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, padding=kernel_size // 2)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
max_result, _ = torch.max(x, dim=1, keepdim=True)
|
||||
avg_result = torch.mean(x, dim=1, keepdim=True)
|
||||
result = torch.cat([max_result, avg_result], dim=1)
|
||||
output = self.conv(result)
|
||||
output = self.sigmoid(output)
|
||||
return output
|
||||
class CBAM(nn.Module):
|
||||
def __init__(self, channel=512, reduction=16, kernel_size=7):
|
||||
super().__init__()
|
||||
self.ca = channelAttention(channel, reduction)
|
||||
self.sa = SpatialAttention(kernel_size)
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.modules():#权重初始化
|
||||
if isinstance(m, nn.Conv2d):
|
||||
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
init.constant_(m.weight, 1)
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
init.normal_(m.weight, std=0.001)
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
# b,c_,_ = x.size()
|
||||
# residual = x
|
||||
out = x*self.ca(x)
|
||||
out = out*self.sa(out)
|
||||
return out
|
||||
if __name__ == '__main__':
|
||||
input=torch.randn(50,512,7,7)
|
||||
kernel_size=input.shape[2]
|
||||
cbam = CBAM(channel=512,reduction=16,kernel_size=kernel_size)
|
||||
output=cbam(input)
|
||||
print(output.shape)
|
33
ytracking/tracking/trackers/reid/model/Tool.py
Normal file
33
ytracking/tracking/trackers/reid/model/Tool.py
Normal file
@ -0,0 +1,33 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
class GeM(nn.Module):
|
||||
def __init__(self, p=3, eps=1e-6):
|
||||
super(GeM, self).__init__()
|
||||
self.p = nn.Parameter(torch.ones(1) * p)
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return self.gem(x, p=self.p, eps=self.eps, stride = 2)
|
||||
|
||||
def gem(self, x, p=3, eps=1e-6, stride = 2):
|
||||
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1)), stride=2).pow(1. / p)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + \
|
||||
'(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
|
||||
', ' + 'eps=' + str(self.eps) + ')'
|
||||
|
||||
class TripletLoss(nn.Module):
|
||||
def __init__(self, margin):
|
||||
super(TripletLoss, self).__init__()
|
||||
self.margin = margin
|
||||
|
||||
def forward(self, anchor, positive, negative, size_average = True):
|
||||
distance_positive = (anchor-positive).pow(2).sum(1)
|
||||
distance_negative = (anchor-negative).pow(2).sum(1)
|
||||
losses = F.relu(distance_negative-distance_positive+self.margin)
|
||||
return losses.mean() if size_average else losses.sum()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print('')
|
9
ytracking/tracking/trackers/reid/model/__init__.py
Normal file
9
ytracking/tracking/trackers/reid/model/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
from .fmobilenet import FaceMobileNet
|
||||
from .resnet_face import ResIRSE
|
||||
from .mobilevit import mobilevit_s
|
||||
from .metric import ArcFace, CosFace
|
||||
from .loss import FocalLoss
|
||||
from .resbam import resnet
|
||||
from .resnet_pre import resnet18, resnet34, resnet50
|
||||
from .mobilenet_v2 import mobilenet_v2
|
||||
from .mobilenet_v3 import MobileNetV3_Small, MobileNetV3_Large
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
124
ytracking/tracking/trackers/reid/model/fmobilenet.py
Normal file
124
ytracking/tracking/trackers/reid/model/fmobilenet.py
Normal file
@ -0,0 +1,124 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
|
||||
class Flatten(nn.Module):
|
||||
def forward(self, x):
|
||||
return x.view(x.shape[0], -1)
|
||||
|
||||
class ConvBn(nn.Module):
|
||||
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
|
||||
nn.BatchNorm2d(out_c)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class ConvBnPrelu(nn.Module):
|
||||
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
ConvBn(in_c, out_c, kernel, stride, padding, groups),
|
||||
nn.PReLU(out_c)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class DepthWise(nn.Module):
|
||||
|
||||
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
ConvBnPrelu(in_c, groups, kernel=(1, 1), stride=1, padding=0),
|
||||
ConvBnPrelu(groups, groups, kernel=kernel, stride=stride, padding=padding, groups=groups),
|
||||
ConvBn(groups, out_c, kernel=(1, 1), stride=1, padding=0),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class DepthWiseRes(nn.Module):
|
||||
"""DepthWise with Residual"""
|
||||
|
||||
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
|
||||
super().__init__()
|
||||
self.net = DepthWise(in_c, out_c, kernel, stride, padding, groups)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x) + x
|
||||
|
||||
|
||||
class MultiDepthWiseRes(nn.Module):
|
||||
|
||||
def __init__(self, num_block, channels, kernel=(3, 3), stride=1, padding=1, groups=1):
|
||||
super().__init__()
|
||||
|
||||
self.net = nn.Sequential(*[
|
||||
DepthWiseRes(channels, channels, kernel, stride, padding, groups)
|
||||
for _ in range(num_block)
|
||||
])
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class FaceMobileNet(nn.Module):
|
||||
|
||||
def __init__(self, embedding_size):
|
||||
super().__init__()
|
||||
self.conv1 = ConvBnPrelu(1, 64, kernel=(3, 3), stride=2, padding=1)
|
||||
self.conv2 = ConvBn(64, 64, kernel=(3, 3), stride=1, padding=1, groups=64)
|
||||
self.conv3 = DepthWise(64, 64, kernel=(3, 3), stride=2, padding=1, groups=128)
|
||||
self.conv4 = MultiDepthWiseRes(num_block=4, channels=64, kernel=3, stride=1, padding=1, groups=128)
|
||||
self.conv5 = DepthWise(64, 128, kernel=(3, 3), stride=2, padding=1, groups=256)
|
||||
self.conv6 = MultiDepthWiseRes(num_block=6, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
|
||||
self.conv7 = DepthWise(128, 128, kernel=(3, 3), stride=2, padding=1, groups=512)
|
||||
self.conv8 = MultiDepthWiseRes(num_block=2, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
|
||||
self.conv9 = ConvBnPrelu(128, 512, kernel=(1, 1))
|
||||
self.conv10 = ConvBn(512, 512, groups=512, kernel=(7, 7))
|
||||
self.flatten = Flatten()
|
||||
self.linear = nn.Linear(2048, embedding_size, bias=False)
|
||||
self.bn = nn.BatchNorm1d(embedding_size)
|
||||
|
||||
def forward(self, x):
|
||||
#print('x',x.shape)
|
||||
out = self.conv1(x)
|
||||
out = self.conv2(out)
|
||||
out = self.conv3(out)
|
||||
out = self.conv4(out)
|
||||
out = self.conv5(out)
|
||||
out = self.conv6(out)
|
||||
out = self.conv7(out)
|
||||
out = self.conv8(out)
|
||||
out = self.conv9(out)
|
||||
out = self.conv10(out)
|
||||
out = self.flatten(out)
|
||||
out = self.linear(out)
|
||||
out = self.bn(out)
|
||||
return out
|
||||
|
||||
if __name__ == "__main__":
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
x = Image.open("../samples/009.jpg").convert('L')
|
||||
x = x.resize((128, 128))
|
||||
x = np.asarray(x, dtype=np.float32)
|
||||
x = x[None, None, ...]
|
||||
x = torch.from_numpy(x)
|
||||
net = FaceMobileNet(512)
|
||||
net.eval()
|
||||
with torch.no_grad():
|
||||
out = net(x)
|
||||
print(out.shape)
|
18
ytracking/tracking/trackers/reid/model/loss.py
Normal file
18
ytracking/tracking/trackers/reid/model/loss.py
Normal file
@ -0,0 +1,18 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class FocalLoss(nn.Module):
|
||||
|
||||
def __init__(self, gamma=2):
|
||||
super().__init__()
|
||||
self.gamma = gamma
|
||||
self.ce = torch.nn.CrossEntropyLoss()
|
||||
|
||||
def forward(self, input, target):
|
||||
|
||||
#print(f'theta {input.shape, input[0]}, target {target.shape, target}')
|
||||
logp = self.ce(input, target)
|
||||
p = torch.exp(-logp)
|
||||
loss = (1 - p) ** self.gamma * logp
|
||||
return loss.mean()
|
83
ytracking/tracking/trackers/reid/model/metric.py
Normal file
83
ytracking/tracking/trackers/reid/model/metric.py
Normal file
@ -0,0 +1,83 @@
|
||||
# Definition of ArcFace loss and CosFace loss
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class ArcFace(nn.Module):
|
||||
|
||||
def __init__(self, embedding_size, class_num, s=30.0, m=0.50):
|
||||
"""ArcFace formula:
|
||||
cos(m + theta) = cos(m)cos(theta) - sin(m)sin(theta)
|
||||
Note that:
|
||||
0 <= m + theta <= Pi
|
||||
So if (m + theta) >= Pi, then theta >= Pi - m. In [0, Pi]
|
||||
we have:
|
||||
cos(theta) < cos(Pi - m)
|
||||
So we can use cos(Pi - m) as threshold to check whether
|
||||
(m + theta) go out of [0, Pi]
|
||||
|
||||
Args:
|
||||
embedding_size: usually 128, 256, 512 ...
|
||||
class_num: num of people when training
|
||||
s: scale, see normface https://arxiv.org/abs/1704.06369
|
||||
m: margin, see SphereFace, CosFace, and ArcFace paper
|
||||
"""
|
||||
super().__init__()
|
||||
self.in_features = embedding_size
|
||||
self.out_features = class_num
|
||||
self.s = s
|
||||
self.m = m
|
||||
self.weight = nn.Parameter(torch.FloatTensor(class_num, embedding_size))
|
||||
nn.init.xavier_uniform_(self.weight)
|
||||
|
||||
self.cos_m = math.cos(m)
|
||||
self.sin_m = math.sin(m)
|
||||
self.th = math.cos(math.pi - m)
|
||||
self.mm = math.sin(math.pi - m) * m
|
||||
|
||||
def forward(self, input, label):
|
||||
#print(f"embding {self.in_features}, class_num {self.out_features}, input {len(input)}, label {len(label)}")
|
||||
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
|
||||
# print('F.normalize(input)',input.shape)
|
||||
# print('F.normalize(self.weight)',F.normalize(self.weight).shape)
|
||||
sine = ((1.0 - cosine.pow(2)).clamp(0, 1)).sqrt()
|
||||
phi = cosine * self.cos_m - sine * self.sin_m
|
||||
phi = torch.where(cosine > self.th, phi, cosine - self.mm) # drop to CosFace
|
||||
#print(f'consine {cosine.shape, cosine}, sine {sine.shape, sine}, phi {phi.shape, phi}')
|
||||
# update y_i by phi in cosine
|
||||
output = cosine * 1.0 # make backward works
|
||||
batch_size = len(output)
|
||||
output[range(batch_size), label] = phi[range(batch_size), label]
|
||||
# print(f'output {(output * self.s).shape}')
|
||||
# print(f'phi[range(batch_size), label] {phi[range(batch_size), label]}')
|
||||
return output * self.s
|
||||
|
||||
|
||||
class CosFace(nn.Module):
|
||||
|
||||
def __init__(self, in_features, out_features, s=30.0, m=0.40):
|
||||
"""
|
||||
Args:
|
||||
embedding_size: usually 128, 256, 512 ...
|
||||
class_num: num of people when training
|
||||
s: scale, see normface https://arxiv.org/abs/1704.06369
|
||||
m: margin, see SphereFace, CosFace, and ArcFace paper
|
||||
"""
|
||||
super().__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
self.s = s
|
||||
self.m = m
|
||||
self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
|
||||
nn.init.xavier_uniform_(self.weight)
|
||||
|
||||
def forward(self, input, label):
|
||||
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
|
||||
phi = cosine - self.m
|
||||
output = cosine * 1.0 # make backward works
|
||||
batch_size = len(output)
|
||||
output[range(batch_size), label] = phi[range(batch_size), label]
|
||||
return output * self.s
|
200
ytracking/tracking/trackers/reid/model/mobilenet_v2.py
Normal file
200
ytracking/tracking/trackers/reid/model/mobilenet_v2.py
Normal file
@ -0,0 +1,200 @@
|
||||
from torch import nn
|
||||
from .utils import load_state_dict_from_url
|
||||
from ..config import config as conf
|
||||
|
||||
__all__ = ['MobileNetV2', 'mobilenet_v2']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
|
||||
}
|
||||
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
"""
|
||||
This function is taken from the original tf repo.
|
||||
It ensures that all layers have a channel number that is divisible by 8
|
||||
It can be seen here:
|
||||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
||||
:param v:
|
||||
:param divisor:
|
||||
:param min_value:
|
||||
:return:
|
||||
"""
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
# Make sure that round down does not go down by more than 10%.
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Sequential):
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
|
||||
padding = (kernel_size - 1) // 2
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
super(ConvBNReLU, self).__init__(
|
||||
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
|
||||
norm_layer(out_planes),
|
||||
nn.ReLU6(inplace=True)
|
||||
)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
|
||||
hidden_dim = int(round(inp * expand_ratio))
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
layers = []
|
||||
if expand_ratio != 1:
|
||||
# pw
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
|
||||
layers.extend([
|
||||
# dw
|
||||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
norm_layer(oup),
|
||||
])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
def __init__(self,
|
||||
num_classes=conf.embedding_size,
|
||||
width_mult=1.0,
|
||||
inverted_residual_setting=None,
|
||||
round_nearest=8,
|
||||
block=None,
|
||||
norm_layer=None):
|
||||
"""
|
||||
MobileNet V2 main class
|
||||
|
||||
Args:
|
||||
num_classes (int): Number of classes
|
||||
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
|
||||
inverted_residual_setting: Network structure
|
||||
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
|
||||
Set to 1 to turn off rounding
|
||||
block: Module specifying inverted residual building block for mobilenet
|
||||
norm_layer: Module specifying the normalization layer to use
|
||||
|
||||
"""
|
||||
super(MobileNetV2, self).__init__()
|
||||
|
||||
if block is None:
|
||||
block = InvertedResidual
|
||||
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
|
||||
if inverted_residual_setting is None:
|
||||
inverted_residual_setting = [
|
||||
# t, c, n, s
|
||||
[1, 16, 1, 1],
|
||||
[6, 24, 2, 2],
|
||||
[6, 32, 3, 2],
|
||||
[6, 64, 4, 2],
|
||||
[6, 96, 3, 1],
|
||||
[6, 160, 3, 2],
|
||||
[6, 320, 1, 1],
|
||||
]
|
||||
|
||||
# only check the first element, assuming user knows t,c,n,s are required
|
||||
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
|
||||
raise ValueError("inverted_residual_setting should be non-empty "
|
||||
"or a 4-element list, got {}".format(inverted_residual_setting))
|
||||
|
||||
# building first layer
|
||||
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
|
||||
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
|
||||
features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
|
||||
# building inverted residual blocks
|
||||
for t, c, n, s in inverted_residual_setting:
|
||||
output_channel = _make_divisible(c * width_mult, round_nearest)
|
||||
for i in range(n):
|
||||
stride = s if i == 0 else 1
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
|
||||
input_channel = output_channel
|
||||
# building last several layers
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
|
||||
# make it nn.Sequential
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
# building classifier
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(self.last_channel, num_classes),
|
||||
)
|
||||
|
||||
# weight initialization
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.ones_(m.weight)
|
||||
nn.init.zeros_(m.bias)
|
||||
elif isinstance(m, nn.Linear):
|
||||
nn.init.normal_(m.weight, 0, 0.01)
|
||||
nn.init.zeros_(m.bias)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# This exists since TorchScript doesn't support inheritance, so the superclass method
|
||||
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
|
||||
x = self.features(x)
|
||||
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
|
||||
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
|
||||
def mobilenet_v2(pretrained=True, progress=True, **kwargs):
|
||||
"""
|
||||
Constructs a MobileNetV2 architecture from
|
||||
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
model = MobileNetV2(**kwargs)
|
||||
if pretrained:
|
||||
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
|
||||
progress=progress)
|
||||
src_state_dict = state_dict
|
||||
target_state_dict = model.state_dict()
|
||||
skip_keys = []
|
||||
# skip mismatch size tensors in case of pretraining
|
||||
for k in src_state_dict.keys():
|
||||
if k not in target_state_dict:
|
||||
continue
|
||||
if src_state_dict[k].size() != target_state_dict[k].size():
|
||||
skip_keys.append(k)
|
||||
for k in skip_keys:
|
||||
del src_state_dict[k]
|
||||
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
|
||||
#.load_state_dict(state_dict)
|
||||
return model
|
200
ytracking/tracking/trackers/reid/model/mobilenet_v3.py
Normal file
200
ytracking/tracking/trackers/reid/model/mobilenet_v3.py
Normal file
@ -0,0 +1,200 @@
|
||||
'''MobileNetV3 in PyTorch.
|
||||
|
||||
See the paper "Inverted Residuals and Linear Bottlenecks:
|
||||
Mobile Networks for Classification, Detection and Segmentation" for more details.
|
||||
'''
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import init
|
||||
from ..config import config as conf
|
||||
|
||||
|
||||
class hswish(nn.Module):
|
||||
def forward(self, x):
|
||||
out = x * F.relu6(x + 3, inplace=True) / 6
|
||||
return out
|
||||
|
||||
|
||||
class hsigmoid(nn.Module):
|
||||
def forward(self, x):
|
||||
out = F.relu6(x + 3, inplace=True) / 6
|
||||
return out
|
||||
|
||||
|
||||
class SeModule(nn.Module):
|
||||
def __init__(self, in_size, reduction=4):
|
||||
super(SeModule, self).__init__()
|
||||
self.se = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(in_size // reduction),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(in_size),
|
||||
hsigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.se(x)
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
'''expand + depthwise + pointwise'''
|
||||
def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
|
||||
super(Block, self).__init__()
|
||||
self.stride = stride
|
||||
self.se = semodule
|
||||
|
||||
self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(expand_size)
|
||||
self.nolinear1 = nolinear
|
||||
self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(expand_size)
|
||||
self.nolinear2 = nolinear
|
||||
self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(out_size)
|
||||
|
||||
self.shortcut = nn.Sequential()
|
||||
if stride == 1 and in_size != out_size:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
nn.BatchNorm2d(out_size),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.nolinear1(self.bn1(self.conv1(x)))
|
||||
out = self.nolinear2(self.bn2(self.conv2(out)))
|
||||
out = self.bn3(self.conv3(out))
|
||||
if self.se != None:
|
||||
out = self.se(out)
|
||||
out = out + self.shortcut(x) if self.stride==1 else out
|
||||
return out
|
||||
|
||||
|
||||
class MobileNetV3_Large(nn.Module):
|
||||
def __init__(self, num_classes=conf.embedding_size):
|
||||
super(MobileNetV3_Large, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(16)
|
||||
self.hs1 = hswish()
|
||||
|
||||
self.bneck = nn.Sequential(
|
||||
Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
|
||||
Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
|
||||
Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
|
||||
Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
|
||||
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
|
||||
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
|
||||
Block(3, 40, 240, 80, hswish(), None, 2),
|
||||
Block(3, 80, 200, 80, hswish(), None, 1),
|
||||
Block(3, 80, 184, 80, hswish(), None, 1),
|
||||
Block(3, 80, 184, 80, hswish(), None, 1),
|
||||
Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
|
||||
Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
|
||||
Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
|
||||
Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
|
||||
Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
|
||||
)
|
||||
|
||||
|
||||
self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(960)
|
||||
self.hs2 = hswish()
|
||||
self.linear3 = nn.Linear(960, 1280)
|
||||
self.bn3 = nn.BatchNorm1d(1280)
|
||||
self.hs3 = hswish()
|
||||
self.linear4 = nn.Linear(1280, num_classes)
|
||||
self.init_params()
|
||||
|
||||
def init_params(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
init.constant_(m.weight, 1)
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
init.normal_(m.weight, std=0.001)
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.hs1(self.bn1(self.conv1(x)))
|
||||
out = self.bneck(out)
|
||||
out = self.hs2(self.bn2(self.conv2(out)))
|
||||
out = F.avg_pool2d(out, conf.img_size // 32)
|
||||
out = out.view(out.size(0), -1)
|
||||
out = self.hs3(self.bn3(self.linear3(out)))
|
||||
out = self.linear4(out)
|
||||
return out
|
||||
|
||||
|
||||
|
||||
class MobileNetV3_Small(nn.Module):
|
||||
def __init__(self, num_classes=conf.embedding_size):
|
||||
super(MobileNetV3_Small, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(16)
|
||||
self.hs1 = hswish()
|
||||
|
||||
self.bneck = nn.Sequential(
|
||||
Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
|
||||
Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
|
||||
Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
|
||||
Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
|
||||
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
|
||||
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
|
||||
Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
|
||||
Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
|
||||
Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
|
||||
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
|
||||
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
|
||||
)
|
||||
|
||||
|
||||
self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(576)
|
||||
self.hs2 = hswish()
|
||||
self.linear3 = nn.Linear(576, 1280)
|
||||
self.bn3 = nn.BatchNorm1d(1280)
|
||||
self.hs3 = hswish()
|
||||
self.linear4 = nn.Linear(1280, num_classes)
|
||||
self.init_params()
|
||||
|
||||
def init_params(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
init.constant_(m.weight, 1)
|
||||
init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.Linear):
|
||||
init.normal_(m.weight, std=0.001)
|
||||
if m.bias is not None:
|
||||
init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.hs1(self.bn1(self.conv1(x)))
|
||||
out = self.bneck(out)
|
||||
out = self.hs2(self.bn2(self.conv2(out)))
|
||||
out = F.avg_pool2d(out, conf.img_size // 32)
|
||||
out = out.view(out.size(0), -1)
|
||||
|
||||
out = self.hs3(self.bn3(self.linear3(out)))
|
||||
out = self.linear4(out)
|
||||
return out
|
||||
|
||||
|
||||
|
||||
def test():
|
||||
net = MobileNetV3_Small()
|
||||
x = torch.randn(2,3,224,224)
|
||||
y = net(x)
|
||||
print(y.size())
|
||||
|
||||
# test()
|
265
ytracking/tracking/trackers/reid/model/mobilevit.py
Normal file
265
ytracking/tracking/trackers/reid/model/mobilevit.py
Normal file
@ -0,0 +1,265 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from einops import rearrange
|
||||
from ..config import config as conf
|
||||
|
||||
|
||||
def conv_1x1_bn(inp, oup):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
nn.SiLU()
|
||||
)
|
||||
|
||||
|
||||
def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
nn.SiLU()
|
||||
)
|
||||
|
||||
|
||||
class PreNorm(nn.Module):
|
||||
def __init__(self, dim, fn):
|
||||
super().__init__()
|
||||
self.norm = nn.LayerNorm(dim)
|
||||
self.fn = fn
|
||||
|
||||
def forward(self, x, **kwargs):
|
||||
return self.fn(self.norm(x), **kwargs)
|
||||
|
||||
|
||||
class FeedForward(nn.Module):
|
||||
def __init__(self, dim, hidden_dim, dropout=0.):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.Linear(dim, hidden_dim),
|
||||
nn.SiLU(),
|
||||
nn.Dropout(dropout),
|
||||
nn.Linear(hidden_dim, dim),
|
||||
nn.Dropout(dropout)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class Attention(nn.Module):
|
||||
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
|
||||
super().__init__()
|
||||
inner_dim = dim_head * heads
|
||||
project_out = not (heads == 1 and dim_head == dim)
|
||||
|
||||
self.heads = heads
|
||||
self.scale = dim_head ** -0.5
|
||||
|
||||
self.attend = nn.Softmax(dim=-1)
|
||||
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
|
||||
|
||||
self.to_out = nn.Sequential(
|
||||
nn.Linear(inner_dim, dim),
|
||||
nn.Dropout(dropout)
|
||||
) if project_out else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
qkv = self.to_qkv(x).chunk(3, dim=-1)
|
||||
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=self.heads), qkv)
|
||||
|
||||
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
|
||||
attn = self.attend(dots)
|
||||
out = torch.matmul(attn, v)
|
||||
out = rearrange(out, 'b p h n d -> b p n (h d)')
|
||||
return self.to_out(out)
|
||||
|
||||
|
||||
class Transformer(nn.Module):
|
||||
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
|
||||
super().__init__()
|
||||
self.layers = nn.ModuleList([])
|
||||
for _ in range(depth):
|
||||
self.layers.append(nn.ModuleList([
|
||||
PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
|
||||
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
|
||||
]))
|
||||
|
||||
def forward(self, x):
|
||||
for attn, ff in self.layers:
|
||||
x = attn(x) + x
|
||||
x = ff(x) + x
|
||||
return x
|
||||
|
||||
|
||||
class MV2Block(nn.Module):
|
||||
def __init__(self, inp, oup, stride=1, expansion=4):
|
||||
super().__init__()
|
||||
self.stride = stride
|
||||
assert stride in [1, 2]
|
||||
|
||||
hidden_dim = int(inp * expansion)
|
||||
self.use_res_connect = self.stride == 1 and inp == oup
|
||||
|
||||
if expansion == 1:
|
||||
self.conv = nn.Sequential(
|
||||
# dw
|
||||
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
||||
nn.BatchNorm2d(hidden_dim),
|
||||
nn.SiLU(),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
)
|
||||
else:
|
||||
self.conv = nn.Sequential(
|
||||
# pw
|
||||
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(hidden_dim),
|
||||
nn.SiLU(),
|
||||
# dw
|
||||
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
|
||||
nn.BatchNorm2d(hidden_dim),
|
||||
nn.SiLU(),
|
||||
# pw-linear
|
||||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_res_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class MobileViTBlock(nn.Module):
|
||||
def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
|
||||
super().__init__()
|
||||
self.ph, self.pw = patch_size
|
||||
|
||||
self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
|
||||
self.conv2 = conv_1x1_bn(channel, dim)
|
||||
|
||||
self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)
|
||||
|
||||
self.conv3 = conv_1x1_bn(dim, channel)
|
||||
self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
|
||||
|
||||
def forward(self, x):
|
||||
y = x.clone()
|
||||
|
||||
# Local representations
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
|
||||
# Global representations
|
||||
_, _, h, w = x.shape
|
||||
x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
|
||||
x = self.transformer(x)
|
||||
x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h // self.ph, w=w // self.pw, ph=self.ph,
|
||||
pw=self.pw)
|
||||
|
||||
# Fusion
|
||||
x = self.conv3(x)
|
||||
x = torch.cat((x, y), 1)
|
||||
x = self.conv4(x)
|
||||
return x
|
||||
|
||||
|
||||
class MobileViT(nn.Module):
|
||||
def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
|
||||
super().__init__()
|
||||
ih, iw = image_size
|
||||
ph, pw = patch_size
|
||||
assert ih % ph == 0 and iw % pw == 0
|
||||
|
||||
L = [2, 4, 3]
|
||||
|
||||
self.conv1 = conv_nxn_bn(3, channels[0], stride=2)
|
||||
|
||||
self.mv2 = nn.ModuleList([])
|
||||
self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
|
||||
self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion))
|
||||
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
|
||||
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion)) # Repeat
|
||||
self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion))
|
||||
self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
|
||||
self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
|
||||
|
||||
self.mvit = nn.ModuleList([])
|
||||
self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0] * 2)))
|
||||
self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1] * 4)))
|
||||
self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2] * 4)))
|
||||
|
||||
self.conv2 = conv_1x1_bn(channels[-2], channels[-1])
|
||||
|
||||
self.pool = nn.AvgPool2d(ih // 32, 1)
|
||||
self.fc = nn.Linear(channels[-1], num_classes, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
#print('x',x.shape)
|
||||
x = self.conv1(x)
|
||||
x = self.mv2[0](x)
|
||||
|
||||
x = self.mv2[1](x)
|
||||
x = self.mv2[2](x)
|
||||
x = self.mv2[3](x) # Repeat
|
||||
|
||||
x = self.mv2[4](x)
|
||||
x = self.mvit[0](x)
|
||||
|
||||
x = self.mv2[5](x)
|
||||
x = self.mvit[1](x)
|
||||
|
||||
x = self.mv2[6](x)
|
||||
x = self.mvit[2](x)
|
||||
x = self.conv2(x)
|
||||
|
||||
|
||||
#print('pool_before',x.shape)
|
||||
x = self.pool(x).view(-1, x.shape[1])
|
||||
#print('self_pool',self.pool)
|
||||
#print('pool_after',x.shape)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
||||
def mobilevit_xxs():
|
||||
dims = [64, 80, 96]
|
||||
channels = [16, 16, 24, 24, 48, 48, 64, 64, 80, 80, 320]
|
||||
return MobileViT((256, 256), dims, channels, num_classes=1000, expansion=2)
|
||||
|
||||
|
||||
def mobilevit_xs():
|
||||
dims = [96, 120, 144]
|
||||
channels = [16, 32, 48, 48, 64, 64, 80, 80, 96, 96, 384]
|
||||
return MobileViT((256, 256), dims, channels, num_classes=1000)
|
||||
|
||||
|
||||
def mobilevit_s():
|
||||
dims = [144, 192, 240]
|
||||
channels = [16, 32, 64, 64, 96, 96, 128, 128, 160, 160, 640]
|
||||
return MobileViT((conf.img_size, conf.img_size), dims, channels, num_classes=conf.embedding_size)
|
||||
|
||||
|
||||
def count_parameters(model):
|
||||
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
img = torch.randn(5, 3, 256, 256)
|
||||
|
||||
vit = mobilevit_xxs()
|
||||
out = vit(img)
|
||||
print(out.shape)
|
||||
print(count_parameters(vit))
|
||||
|
||||
vit = mobilevit_xs()
|
||||
out = vit(img)
|
||||
print(out.shape)
|
||||
print(count_parameters(vit))
|
||||
|
||||
vit = mobilevit_s()
|
||||
out = vit(img)
|
||||
print(out.shape)
|
||||
print(count_parameters(vit))
|
134
ytracking/tracking/trackers/reid/model/resbam.py
Normal file
134
ytracking/tracking/trackers/reid/model/resbam.py
Normal file
@ -0,0 +1,134 @@
|
||||
from .CBAM import CBAM
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from .Tool import GeM as gem
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
def __init__(self, inchannel, outchannel,stride =1,dowsample=None):
|
||||
# super(Bottleneck, self).__init__()
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(in_channels=inchannel,out_channels=outchannel, kernel_size=1, stride=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(outchannel)
|
||||
self.conv2 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel,kernel_size=3,bias=False, stride=stride,padding=1)
|
||||
self.bn2 = nn.BatchNorm2d(outchannel)
|
||||
self.conv3 =nn.Conv2d(in_channels=outchannel, out_channels=outchannel*self.expansion,stride=1,bias=False,kernel_size=1)
|
||||
self.bn3 = nn.BatchNorm2d(outchannel*self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = dowsample
|
||||
|
||||
def forward(self, x):
|
||||
self.identity = x
|
||||
# print('>>>>>>>>',type(x))
|
||||
if self.downsample is not None:
|
||||
# print('>>>>downsample>>>>', type(self.downsample))
|
||||
self.identity = self.downsample(x)
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
# print('>>>>out>>>identity',out.size(),self.identity.size())
|
||||
out = out+self.identity
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
class resnet(nn.Module):
|
||||
def __init__(self,block=Bottleneck, block_num=[3,4,6,3], num_class=1000):
|
||||
super().__init__()
|
||||
self.in_channel = 64
|
||||
self.conv1 = nn.Conv2d(in_channels=3,
|
||||
out_channels=self.in_channel,
|
||||
stride=2,
|
||||
kernel_size=7,
|
||||
padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(self.in_channel)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.cbam = CBAM(self.in_channel)
|
||||
self.cbam1 = CBAM(2048)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, block_num[0],stride=1)
|
||||
self.layer2 = self._make_layer(block, 128, block_num[1],stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, block_num[2],stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, block_num[3],stride=2)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
|
||||
self.gem = gem()
|
||||
self.fc = nn.Linear(512*block.expansion, num_class)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal(m.weight,mode = 'fan_out',
|
||||
nonlinearity='relu')
|
||||
if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
nn.init.constant_(m.bias, 1.0)
|
||||
|
||||
def _make_layer(self,block ,channel, block_num, stride=1):
|
||||
downsample = None
|
||||
if stride !=1 or self.in_channel != channel*block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.in_channel, channel*block.expansion,kernel_size=1,stride=stride,bias=False),
|
||||
nn.BatchNorm2d(channel*block.expansion))
|
||||
layer = []
|
||||
layer.append(block(self.in_channel, channel, stride, downsample))
|
||||
self.in_channel = channel*block.expansion
|
||||
for _ in range(1, block_num):
|
||||
layer.append(block(self.in_channel, channel))
|
||||
return nn.Sequential(*layer)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.cbam(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.cbam1(x)
|
||||
# x = self.avgpool(x)
|
||||
x = self.gem(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
class TripletNet(nn.Module):
|
||||
def __init__(self, num_class, flag=True):
|
||||
super(TripletNet, self).__init__()
|
||||
self.initnet = rescbam(num_class)
|
||||
self.flag = flag
|
||||
|
||||
def forward(self, x1, x2=None, x3=None):
|
||||
if self.flag:
|
||||
output1 = self.initnet(x1)
|
||||
output2 = self.initnet(x2)
|
||||
output3 = self.initnet(x3)
|
||||
return output1, output2, output3
|
||||
else:
|
||||
output = self.initnet(x1)
|
||||
return output
|
||||
|
||||
def rescbam(num_class):
|
||||
return resnet(block=Bottleneck, block_num=[3,4,6,3],num_class=num_class)
|
||||
|
||||
if __name__ =='__main__':
|
||||
input1 = torch.randn(4,3,640,640)
|
||||
input2 = torch.randn(4,3,640,640)
|
||||
input3 = torch.randn(4,3,640,640)
|
||||
|
||||
#rescbam测试
|
||||
# Resnet50 = rescbam(512)
|
||||
# output = Resnet50.forward(input1)
|
||||
# print(Resnet50)
|
||||
|
||||
#trnet测试
|
||||
trnet = TripletNet(512)
|
||||
output = trnet(input1, input2, input3)
|
||||
print(output)
|
182
ytracking/tracking/trackers/reid/model/resnet.py
Normal file
182
ytracking/tracking/trackers/reid/model/resnet.py
Normal file
@ -0,0 +1,182 @@
|
||||
"""resnet in pytorch
|
||||
|
||||
|
||||
|
||||
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
|
||||
|
||||
Deep Residual Learning for Image Recognition
|
||||
https://arxiv.org/abs/1512.03385v1
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from config import config as conf
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
"""Basic Block for resnet 18 and resnet 34
|
||||
|
||||
"""
|
||||
|
||||
#BasicBlock and BottleNeck block
|
||||
#have different output size
|
||||
#we use class attribute expansion
|
||||
#to distinct
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, in_channels, out_channels, stride=1):
|
||||
super().__init__()
|
||||
|
||||
#residual function
|
||||
self.residual_function = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
|
||||
)
|
||||
|
||||
#shortcut
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
#the shortcut output dimension is not the same with residual function
|
||||
#use 1*1 convolution to match the dimension
|
||||
if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
|
||||
|
||||
class BottleNeck(nn.Module):
|
||||
"""Residual block for resnet over 50 layers
|
||||
|
||||
"""
|
||||
expansion = 4
|
||||
def __init__(self, in_channels, out_channels, stride=1):
|
||||
super().__init__()
|
||||
self.residual_function = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels * BottleNeck.expansion),
|
||||
)
|
||||
|
||||
self.shortcut = nn.Sequential()
|
||||
|
||||
if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
|
||||
nn.BatchNorm2d(out_channels * BottleNeck.expansion)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, num_block, num_classes=conf.embedding_size):
|
||||
super().__init__()
|
||||
|
||||
self.in_channels = 64
|
||||
|
||||
# self.conv1 = nn.Sequential(
|
||||
# nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
|
||||
# nn.BatchNorm2d(64),
|
||||
# nn.ReLU(inplace=True))
|
||||
|
||||
self.conv1 = nn.Sequential(
|
||||
nn.Conv2d(3, 64,stride=2,kernel_size=7,padding=3,bias=False),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
|
||||
|
||||
|
||||
#we use a different inputsize than the original paper
|
||||
#so conv2_x's stride is 1
|
||||
self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
|
||||
self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
|
||||
self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
|
||||
self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
|
||||
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal(m.weight,mode = 'fan_out',
|
||||
nonlinearity='relu')
|
||||
if isinstance(m, (nn.BatchNorm2d)):
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
nn.init.constant_(m.bias, 1.0)
|
||||
|
||||
def _make_layer(self, block, out_channels, num_blocks, stride):
|
||||
"""make resnet layers(by layer i didnt mean this 'layer' was the
|
||||
same as a neuron netowork layer, ex. conv layer), one layer may
|
||||
contain more than one residual block
|
||||
|
||||
Args:
|
||||
block: block type, basic block or bottle neck block
|
||||
out_channels: output depth channel number of this layer
|
||||
num_blocks: how many blocks per layer
|
||||
stride: the stride of the first block of this layer
|
||||
|
||||
Return:
|
||||
return a resnet layer
|
||||
"""
|
||||
|
||||
# we have num_block blocks per layer, the first block
|
||||
# could be 1 or 2, other blocks would always be 1
|
||||
strides = [stride] + [1] * (num_blocks - 1)
|
||||
layers = []
|
||||
for stride in strides:
|
||||
layers.append(block(self.in_channels, out_channels, stride))
|
||||
self.in_channels = out_channels * block.expansion
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
output = self.conv1(x)
|
||||
output = self.conv2_x(output)
|
||||
output = self.conv3_x(output)
|
||||
output = self.conv4_x(output)
|
||||
output = self.conv5_x(output)
|
||||
print('pollBefore',output.shape)
|
||||
output = self.avg_pool(output)
|
||||
print('poolAfter',output.shape)
|
||||
output = output.view(output.size(0), -1)
|
||||
print('fcBefore',output.shape)
|
||||
output = self.fc(output)
|
||||
|
||||
return output
|
||||
|
||||
def resnet18():
|
||||
""" return a ResNet 18 object
|
||||
"""
|
||||
return ResNet(BasicBlock, [2, 2, 2, 2])
|
||||
|
||||
def resnet34():
|
||||
""" return a ResNet 34 object
|
||||
"""
|
||||
return ResNet(BasicBlock, [3, 4, 6, 3])
|
||||
|
||||
def resnet50():
|
||||
""" return a ResNet 50 object
|
||||
"""
|
||||
return ResNet(BottleNeck, [3, 4, 6, 3])
|
||||
|
||||
def resnet101():
|
||||
""" return a ResNet 101 object
|
||||
"""
|
||||
return ResNet(BottleNeck, [3, 4, 23, 3])
|
||||
|
||||
def resnet152():
|
||||
""" return a ResNet 152 object
|
||||
"""
|
||||
return ResNet(BottleNeck, [3, 8, 36, 3])
|
||||
|
||||
|
120
ytracking/tracking/trackers/reid/model/resnet_face.py
Normal file
120
ytracking/tracking/trackers/reid/model/resnet_face.py
Normal file
@ -0,0 +1,120 @@
|
||||
""" Resnet_IR_SE in ArcFace """
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Flatten(nn.Module):
|
||||
def forward(self, x):
|
||||
return x.reshape(x.shape[0], -1)
|
||||
|
||||
|
||||
class SEConv(nn.Module):
|
||||
"""Use Convolution instead of FullyConnection in SE"""
|
||||
|
||||
def __init__(self, channels, reduction):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
nn.Conv2d(channels, channels // reduction, kernel_size=1, bias=False),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(channels // reduction, channels, kernel_size=1, bias=False),
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x) * x
|
||||
|
||||
|
||||
class SE(nn.Module):
|
||||
|
||||
def __init__(self, channels, reduction):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
nn.Linear(channels, channels // reduction),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(channels // reduction, channels),
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x) * x
|
||||
|
||||
|
||||
class IRSE(nn.Module):
|
||||
|
||||
def __init__(self, channels, depth, stride):
|
||||
super().__init__()
|
||||
if channels == depth:
|
||||
self.shortcut = nn.MaxPool2d(kernel_size=1, stride=stride)
|
||||
else:
|
||||
self.shortcut = nn.Sequential(
|
||||
nn.Conv2d(channels, depth, (1, 1), stride, bias=False),
|
||||
nn.BatchNorm2d(depth),
|
||||
)
|
||||
self.residual = nn.Sequential(
|
||||
nn.BatchNorm2d(channels),
|
||||
nn.Conv2d(channels, depth, (3, 3), 1, 1, bias=False),
|
||||
nn.PReLU(depth),
|
||||
nn.Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
|
||||
nn.BatchNorm2d(depth),
|
||||
SEConv(depth, 16),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
return self.shortcut(x) + self.residual(x)
|
||||
|
||||
class ResIRSE(nn.Module):
|
||||
"""Resnet50-IRSE backbone"""
|
||||
|
||||
def __init__(self, ih,embedding_size, drop_ratio):
|
||||
super().__init__()
|
||||
ih_last = ih // 16
|
||||
self.input_layer = nn.Sequential(
|
||||
nn.Conv2d(3, 64, (3, 3), 1, 1, bias=False),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.PReLU(64),
|
||||
)
|
||||
self.output_layer = nn.Sequential(
|
||||
nn.BatchNorm2d(512),
|
||||
nn.Dropout(drop_ratio),
|
||||
Flatten(),
|
||||
nn.Linear(512 * ih_last * ih_last, embedding_size),
|
||||
nn.BatchNorm1d(embedding_size),
|
||||
)
|
||||
|
||||
# ["channels", "depth", "stride"],
|
||||
self.res50_arch = [
|
||||
[64, 64, 2], [64, 64, 1], [64, 64, 1],
|
||||
[64, 128, 2], [128, 128, 1], [128, 128, 1], [128, 128, 1],
|
||||
[128, 256, 2], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
|
||||
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
|
||||
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
|
||||
[256, 512, 2], [512, 512, 1], [512, 512, 1],
|
||||
]
|
||||
|
||||
self.body = nn.Sequential(*[ IRSE(a,b,c) for (a,b,c) in self.res50_arch ])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.input_layer(x)
|
||||
x = self.body(x)
|
||||
x = self.output_layer(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
x = Image.open("../samples/009.jpg").convert('L')
|
||||
x = x.resize((128, 128))
|
||||
x = np.asarray(x, dtype=np.float32)
|
||||
x = x[None, None, ...]
|
||||
x = torch.from_numpy(x)
|
||||
net = ResIRSE(512, 0.6)
|
||||
net.eval()
|
||||
with torch.no_grad():
|
||||
out = net(x)
|
||||
print(out.shape)
|
384
ytracking/tracking/trackers/reid/model/resnet_pre.py
Normal file
384
ytracking/tracking/trackers/reid/model/resnet_pre.py
Normal file
@ -0,0 +1,384 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
# from config import config as conf
|
||||
from ..config import config as conf
|
||||
|
||||
try:
|
||||
from torch.hub import load_state_dict_from_url
|
||||
except ImportError:
|
||||
from torch.utils.model_zoo import load_url as load_state_dict_from_url
|
||||
#from .utils import load_state_dict_from_url
|
||||
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
|
||||
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
|
||||
'wide_resnet50_2', 'wide_resnet101_2']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
||||
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
|
||||
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
|
||||
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
|
||||
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=dilation, groups=groups, bias=False, dilation=dilation)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if dilation > 1:
|
||||
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = norm_layer(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = norm_layer(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
|
||||
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
|
||||
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
|
||||
# This variant is also known as ResNet V1.5 and improves accuracy according to
|
||||
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
|
||||
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
|
||||
base_width=64, dilation=1, norm_layer=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = norm_layer(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups, dilation)
|
||||
self.bn2 = norm_layer(width)
|
||||
self.conv3 = conv1x1(width, planes * self.expansion)
|
||||
self.bn3 = norm_layer(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
|
||||
groups=1, width_per_group=64, replace_stride_with_dilation=None,
|
||||
norm_layer=None, scale=0.75):
|
||||
super(ResNet, self).__init__()
|
||||
if norm_layer is None:
|
||||
norm_layer = nn.BatchNorm2d
|
||||
self._norm_layer = norm_layer
|
||||
|
||||
self.inplanes = 64
|
||||
self.dilation = 1
|
||||
if replace_stride_with_dilation is None:
|
||||
# each element in the tuple indicates if we should replace
|
||||
# the 2x2 stride with a dilated convolution instead
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if len(replace_stride_with_dilation) != 3:
|
||||
raise ValueError("replace_stride_with_dilation should be None "
|
||||
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = norm_layer(self.inplanes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, int(64*scale), layers[0])
|
||||
self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2,
|
||||
dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2,
|
||||
dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2,
|
||||
dilate=replace_stride_with_dilation[2])
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
# Zero-initialize the last BN in each residual branch,
|
||||
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
|
||||
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, Bottleneck):
|
||||
nn.init.constant_(m.bn3.weight, 0)
|
||||
elif isinstance(m, BasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
norm_layer = self._norm_layer
|
||||
downsample = None
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
norm_layer(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
|
||||
self.base_width, previous_dilation, norm_layer))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=self.groups,
|
||||
base_width=self.base_width, dilation=self.dilation,
|
||||
norm_layer=norm_layer))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
# See note [TorchScript super()]
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
#print('poolBefore', x.shape)
|
||||
x = self.avgpool(x)
|
||||
#print('poolAfter', x.shape)
|
||||
x = torch.flatten(x, 1)
|
||||
#print('fcBefore',x.shape)
|
||||
x = self.fc(x)
|
||||
# print('fcAfter',x.shape)
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
|
||||
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
|
||||
# model = ResNet(block, layers, **kwargs)
|
||||
# if pretrained:
|
||||
# state_dict = load_state_dict_from_url(model_urls[arch],
|
||||
# progress=progress)
|
||||
# model.load_state_dict(state_dict, strict=False)
|
||||
# return model
|
||||
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
|
||||
model = ResNet(block, layers, **kwargs)
|
||||
if pretrained:
|
||||
state_dict = load_state_dict_from_url(model_urls[arch],
|
||||
progress=progress)
|
||||
#print('state_dict',state_dict)
|
||||
src_state_dict = state_dict
|
||||
target_state_dict = model.state_dict()
|
||||
skip_keys = []
|
||||
# skip mismatch size tensors in case of pretraining
|
||||
for k in src_state_dict.keys():
|
||||
if k not in target_state_dict:
|
||||
continue
|
||||
if src_state_dict[k].size() != target_state_dict[k].size():
|
||||
skip_keys.append(k)
|
||||
for k in skip_keys:
|
||||
del src_state_dict[k]
|
||||
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def resnet18(pretrained=True, progress=True, **kwargs):
|
||||
r"""ResNet-18 model from
|
||||
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet34(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNet-34 model from
|
||||
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet50(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNet-50 model from
|
||||
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet101(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNet-101 model from
|
||||
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnet152(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNet-152 model from
|
||||
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNeXt-50 32x4d model from
|
||||
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 4
|
||||
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
|
||||
pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
|
||||
r"""ResNeXt-101 32x8d model from
|
||||
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 8
|
||||
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
|
||||
pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
|
||||
r"""Wide ResNet-50-2 model from
|
||||
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
|
||||
|
||||
The model is the same as ResNet except for the bottleneck number of channels
|
||||
which is twice larger in every block. The number of channels in outer 1x1
|
||||
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
|
||||
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['width_per_group'] = 64 * 2
|
||||
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
|
||||
pretrained, progress, **kwargs)
|
||||
|
||||
|
||||
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
|
||||
r"""Wide ResNet-101-2 model from
|
||||
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
|
||||
|
||||
The model is the same as ResNet except for the bottleneck number of channels
|
||||
which is twice larger in every block. The number of channels in outer 1x1
|
||||
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
|
||||
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
|
||||
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
progress (bool): If True, displays a progress bar of the download to stderr
|
||||
"""
|
||||
kwargs['width_per_group'] = 64 * 2
|
||||
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
|
||||
pretrained, progress, **kwargs)
|
4
ytracking/tracking/trackers/reid/model/utils.py
Normal file
4
ytracking/tracking/trackers/reid/model/utils.py
Normal file
@ -0,0 +1,4 @@
|
||||
try:
|
||||
from torch.hub import load_state_dict_from_url
|
||||
except ImportError:
|
||||
from torch.utils.model_zoo import load_url as load_state_dict_from_url
|
147
ytracking/tracking/trackers/reid/reid_interface.py
Normal file
147
ytracking/tracking/trackers/reid/reid_interface.py
Normal file
@ -0,0 +1,147 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Thu Jan 18 17:21:01 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
import torch.nn as nn
|
||||
import torchvision.transforms as T
|
||||
from .model import mobilevit_s, resnet18, resnet34, resnet50, mobilenet_v2, MobileNetV3_Small
|
||||
# from .config import config as conf
|
||||
from tools.config import config as conf
|
||||
|
||||
|
||||
class ReIDInterface:
|
||||
def __init__(self, config):
|
||||
self.device = conf.device
|
||||
if conf.backbone == 'resnet18':
|
||||
# model = ResIRSE(img_size, embedding_size, conf.drop_ratio).to(device)
|
||||
model = resnet18().to(self.device)
|
||||
elif conf.backbone == 'resnet34':
|
||||
model = resnet34().to(self.device)
|
||||
elif conf.backbone == 'resnet50':
|
||||
model = resnet50().to(self.device)
|
||||
elif conf.backbone == 'mobilevit_s':
|
||||
model = mobilevit_s().to(self.device)
|
||||
elif conf.backbone == 'mobilenetv3':
|
||||
model = MobileNetV3_Small().to(self.device)
|
||||
else:
|
||||
model = mobilenet_v2().to(self.device)
|
||||
|
||||
self.batch_size = conf.batch_size
|
||||
self.embedding_size = conf.embedding_size
|
||||
self.img_size = conf.img_size
|
||||
|
||||
self.model_path = conf.model_path
|
||||
|
||||
# 原输入为PIL
|
||||
self.transform = T.Compose([
|
||||
T.ToTensor(),
|
||||
T.Resize((self.img_size, self.img_size)),
|
||||
T.ConvertImageDtype(torch.float32),
|
||||
T.Normalize(mean=[0.5], std=[0.5]),
|
||||
])
|
||||
|
||||
|
||||
# self.model = nn.DataParallel(model).to(self.device)
|
||||
self.model = model.to(self.device)
|
||||
self.model.load_state_dict(torch.load(self.model_path, map_location=self.device))
|
||||
self.model.half() if str(self.device) != "cpu" else self.model
|
||||
|
||||
self.model.eval()
|
||||
|
||||
def inference(self, images, detections):
|
||||
if isinstance(images, np.ndarray):
|
||||
features = self.inference_image(images, detections)
|
||||
return features
|
||||
|
||||
batch_patches = []
|
||||
patches = []
|
||||
for i, img in enumerate(images):
|
||||
img = img.copy()
|
||||
patch = self.transform(img)
|
||||
if str(self.device) != "cpu":
|
||||
patch = patch.to(device=self.device).half()
|
||||
else:
|
||||
patch = patch.to(device=self.device)
|
||||
|
||||
patches.append(patch)
|
||||
if (i + 1) % self.batch_size == 0:
|
||||
patches = torch.stack(patches, dim=0)
|
||||
batch_patches.append(patches)
|
||||
patches = []
|
||||
|
||||
if len(patches):
|
||||
patches = torch.stack(patches, dim=0)
|
||||
batch_patches.append(patches)
|
||||
|
||||
features = np.zeros((0, self.embedding_size))
|
||||
for patches in batch_patches:
|
||||
pred=self.model(patches)
|
||||
pred[torch.isinf(pred)] = 1.0
|
||||
feat = pred.cpu().data.numpy()
|
||||
features = np.vstack((features, feat))
|
||||
return features
|
||||
|
||||
def inference_image(self, image, detections):
|
||||
H, W, _ = np.shape(image)
|
||||
|
||||
batch_patches = []
|
||||
patches = []
|
||||
for d in range(np.size(detections, 0)):
|
||||
tlbr = detections[d, :4].astype(np.int_)
|
||||
tlbr[0] = max(0, tlbr[0])
|
||||
tlbr[1] = max(0, tlbr[1])
|
||||
tlbr[2] = min(W - 1, tlbr[2])
|
||||
tlbr[3] = min(H - 1, tlbr[3])
|
||||
img = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
|
||||
|
||||
img = img[:, :, ::-1].copy() # the model expects RGB inputs
|
||||
patch = self.transform(img)
|
||||
|
||||
# patch = patch.to(device=self.device).half()
|
||||
if str(self.device) != "cpu":
|
||||
patch = patch.to(device=self.device).half()
|
||||
else:
|
||||
patch = patch.to(device=self.device)
|
||||
|
||||
patches.append(patch)
|
||||
if (d + 1) % self.batch_size == 0:
|
||||
patches = torch.stack(patches, dim=0)
|
||||
batch_patches.append(patches)
|
||||
patches = []
|
||||
|
||||
if len(patches):
|
||||
patches = torch.stack(patches, dim=0)
|
||||
batch_patches.append(patches)
|
||||
|
||||
features = np.zeros((0, self.embedding_size))
|
||||
for patches in batch_patches:
|
||||
pred = self.model(patches)
|
||||
pred[torch.isinf(pred)] = 1.0
|
||||
feat = pred.cpu().data.numpy()
|
||||
features = np.vstack((features, feat))
|
||||
|
||||
return features
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
21
ytracking/tracking/trackers/reid/test.py
Normal file
21
ytracking/tracking/trackers/reid/test.py
Normal file
@ -0,0 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Fri Jan 19 16:10:39 2024
|
||||
|
||||
@author: ym
|
||||
"""
|
||||
import torch
|
||||
from model.resnet_pre import resnet18
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
model_path = "best.pth"
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
model = resnet18().to(device)
|
||||
model.load_state_dict(torch.load(model_path, map_location=device))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
66
ytracking/tracking/trackers/track.py
Normal file
66
ytracking/tracking/trackers/track.py
Normal file
@ -0,0 +1,66 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
|
||||
from ultralytics.utils import IterableSimpleNamespace, yaml_load
|
||||
from ultralytics.utils.checks import check_yaml
|
||||
|
||||
from .bot_sort import BOTSORT
|
||||
from .byte_tracker import BYTETracker
|
||||
|
||||
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
|
||||
|
||||
|
||||
def on_predict_start(predictor, persist=False):
|
||||
"""
|
||||
Initialize trackers for object tracking during prediction.
|
||||
|
||||
Args:
|
||||
predictor (object): The predictor object to initialize trackers for.
|
||||
persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'.
|
||||
"""
|
||||
if hasattr(predictor, 'trackers') and persist:
|
||||
return
|
||||
tracker = check_yaml(predictor.args.tracker)
|
||||
cfg = IterableSimpleNamespace(**yaml_load(tracker))
|
||||
assert cfg.tracker_type in ['bytetrack', 'botsort'], \
|
||||
f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'"
|
||||
trackers = []
|
||||
for _ in range(predictor.dataset.bs):
|
||||
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
|
||||
trackers.append(tracker)
|
||||
predictor.trackers = trackers
|
||||
|
||||
|
||||
def on_predict_postprocess_end(predictor):
|
||||
"""Postprocess detected boxes and update with object tracking."""
|
||||
bs = predictor.dataset.bs
|
||||
im0s = predictor.batch[1]
|
||||
for i in range(bs):
|
||||
det = predictor.results[i].boxes.cpu().numpy()
|
||||
if len(det) == 0:
|
||||
continue
|
||||
tracks = predictor.trackers[i].update(det, im0s[i])
|
||||
if len(tracks) == 0:
|
||||
continue
|
||||
idx = tracks[:, -1].astype(int)
|
||||
predictor.results[i] = predictor.results[i][idx]
|
||||
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
|
||||
|
||||
|
||||
def register_tracker(model, persist):
|
||||
"""
|
||||
Register tracking callbacks to the model for object tracking during prediction.
|
||||
|
||||
Args:
|
||||
model (object): The model object to register tracking callbacks for.
|
||||
persist (bool): Whether to persist the trackers if they already exist.
|
||||
|
||||
"""
|
||||
model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
|
||||
model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)
|
3
ytracking/tracking/trackers/utils/__init__.py
Normal file
3
ytracking/tracking/trackers/utils/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
BIN
ytracking/tracking/trackers/utils/__pycache__/gmc.cpython-39.pyc
Normal file
BIN
ytracking/tracking/trackers/utils/__pycache__/gmc.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user