更新 detacttracking

This commit is contained in:
lee
2025-01-22 13:16:44 +08:00
parent 2320468c40
commit c9d79f8059
355 changed files with 61097 additions and 1 deletions

View File

Binary file not shown.

View File

@ -0,0 +1,650 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:16:01 2024
@author: ym
"""
import numpy as np
import cv2
from pathlib import Path
from scipy.spatial.distance import cdist
from tracking.utils.mergetrack import track_equal_track, readDict
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class MoveState:
"""商品运动状态标志"""
Static = 0
DownWard = 1
UpWard = 2
FreeMove = 3
Unknown = -1
class ShoppingCart:
def __init__(self, bboxes):
self.bboxes = bboxes
self.loadrate = self.load_rate()
def load_rate(self):
bboxes = self.bboxes
fid = min(bboxes[:, 7])
idx = bboxes[:, 7] == fid
boxes = bboxes[idx]
temp = np.zeros(self.incart.shape, np.uint8)
for i in range(boxes.shape[0]):
x1, y1, x2, y2, tid = boxes[i, 0:5]
cv2.rectangle(temp, (int(x1), int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
'''1. and 滤除购物车边框外的干扰'''
loadstate = cv2.bitwise_and(self.incart, temp)
'''2. xor 得到购物车内内被填充的区域'''
# loadstate = cv2.bitwise_xor(self.incart, temp1)
num_loadstate = cv2.countNonZero(loadstate)
num_incart = cv2.countNonZero(self.incart)
loadrate = num_loadstate / (num_incart+0.01)
# edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png", cv2.IMREAD_GRAYSCALE)
# cv2.imwrite(f"./test/temp.png", cv2.add(temp, edgeline))
# cv2.imwrite(f"./test/incart.png", cv2.add(self.incart, edgeline))
# cv2.imwrite(f"./test/loadstate.png", cv2.add(loadstate, edgeline))
return loadrate
@property
def incart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def outcart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def cartedge(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/cartedge.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
class Track:
'''抽象基类,不能实例化对象'''
def __init__(self, boxes, features=None, imgshape=(1024, 1280)):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
# assert len(set(boxes[:, 4].astype(int))) == 1, "For a Track, track_id more than 1"
# assert len(set(boxes[:, 6].astype(int))) == 1, "For a Track, class number more than 1"
self.boxes = boxes
self.features = features
self.tid = int(boxes[0, 4])
self.cls = int(boxes[0, 6])
self.frnum = boxes.shape[0]
self.isCornpoint = False
self.imgshape = imgshape
# self.isBorder = False
# self.state = MoveState.Unknown
'''轨迹开始帧、结束帧 ID'''
# self.start_fid = int(np.min(boxes[:, 7]))
# self.end_fid = int(np.max(boxes[:, 7]))
''''''
self.Hands = []
self.HandsIou = []
self.Goods = []
self.GoodsIou = []
'''5个关键点中心点、左上点、右上点、左下点、右下点 )坐标'''
self.compute_cornpoints()
'''5个关键点轨迹特征可以在子类中实现降低顺序处理时的计算量
(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
'''应计算各个角点面积、平均面积'''
mw, mh = np.mean(boxes[:, 2]-boxes[:, 0]), np.mean((boxes[:, 3]-boxes[:, 1]))
self.mwh = np.mean((mw, mh))
self.Area = mw * mh
'''
最后一帧与第一帧间的位移:
vshift: 正值为向下,负值为向上
hshift: 负值为向购物车边框两边移动,正值为物品向中心移动
'''
self.vshift = self.cornpoints[-1, 1] - self.cornpoints[0, 1] # 纵向位移
self.hshift = abs(self.cornpoints[0, 0] - self.imgshape[0]/2) - \
abs(self.cornpoints[-1, 0] - self.imgshape[0]/2)
'''手部状态分析'''
self.HAND_STATIC_THRESH = 100
if self.cls == 0:
self.extract_hand_features()
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((self.frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
self.cornpoints = cornpoints
def compute_cornpts_feats(self):
'''
'''
# print(f"TrackID: {self.tid}")
trajectory = []
trajlens = []
trajdist = []
trajrects = []
trajrects_wh = []
for k in range(5):
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
X = self.cornpoints[:, 2*k:2*(k+1)]
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
trajectory.append(traj)
trajlen = np.sum(traj)
trajlens.append(trajlen)
ptdist = np.max(cdist(X, X))
trajdist.append(ptdist)
'''最小外接矩形:
rect[0]: 中心(x, y)
rect[1]: (w, h)
rect[0]: 旋转角度 (-90°, 0]
'''
rect = cv2.minAreaRect(X.astype(np.int64))
rect_wh = max(rect[1])
trajrects_wh.append(rect_wh)
trajrects.append(rect)
self.trajectory = trajectory
self.trajlens = trajlens
self.trajdist = trajdist
self.trajrects = trajrects
self.trajrects_wh = trajrects_wh
def trajfeature(self):
'''
分两种情况计算轨迹特征(检测框边界不在图像边界范围内,在图像边界范围内):
-最小长度轨迹trajmin
-最小轨迹长度trajlen_min
-最小轨迹欧氏距离trajdist_max
'''
# idx1 = self.trajlens.index(max(self.trajlens))
idx1 = self.trajrects_wh.index(max(self.trajrects_wh))
trajmax = self.trajectory[idx1]
trajlen_max = self.trajlens[idx1]
trajdist_max = self.trajdist[idx1]
if not self.isCornpoint:
# idx2 = self.trajlens.index(min(self.trajlens))
idx2 = self.trajrects_wh.index(min(self.trajrects_wh))
trajmin = self.trajectory[idx2]
trajlen_min = self.trajlens[idx2]
trajdist_min = self.trajdist[idx2]
else:
trajmin = self.trajectory[0]
trajlen_min = self.trajlens[0]
trajdist_min = self.trajdist[0]
'''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
trajlen_rate = trajlen_min/(trajlen_max+0.0001)
'''最小轨迹欧氏距离/目标框尺度均值'''
trajdist_rate = trajdist_min/(self.mwh+0.0001)
self.trajmin = trajmin
self.trajmax = trajmax
self.TrajFeat = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
def pt_state_fids(self, det_y, STATIC_THRESH = 8):
'''
前摄时y一般选择为 box 的 y1 坐标,且需限定商品在购物车内。
inputs
y1D array
parameters
STATIC_THRESH轨迹处于静止状态的阈值。
outputs
输出为差分值小于 STATIC_THRESH 的y中元素的start, end索引
ranges = [(x1, y1),
(x1, y1),
...]
'''
# print(f"The ID is: {self.tid}")
# det_y = np.diff(y, axis=0)
ranges, rangex = [], []
static_indices = np.where(np.abs(det_y) < STATIC_THRESH)[0]
if len(static_indices) == 0:
rangex.append((0, len(det_y)))
return ranges, rangex
start_index = static_indices[0]
for i in range(1, len(static_indices)):
if static_indices[i] != static_indices[i-1] + 1:
ranges.append((start_index, static_indices[i-1] + 1))
start_index = static_indices[i]
ranges.append((start_index, static_indices[-1] + 1))
if len(ranges) == 0:
rangex.append((0, len(det_y)))
return ranges, rangex
idx1, idx2 = ranges[0][0], ranges[-1][1]
if idx1 != 0:
rangex.append((0, idx1))
# 轨迹的最后阶段是运动状态
for k in range(1, len(ranges)):
index1 = ranges[k-1][1]
index2 = ranges[k][0]
rangex.append((index1, index2))
if idx2 != len(det_y):
rangex.append((idx2, len(det_y)))
return ranges, rangex
def PositionState(self, camerType="back"):
'''
camerType: back, 后置摄像头
front, 前置摄像头
'''
if camerType=="back":
incart = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart.png'), cv2.IMREAD_GRAYSCALE)
else:
incart = cv2.imread(str(parpath/'shopcart/cart_tempt/incart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
# incart = cv2.imread('./cart_tempt/incart_ftmp.png', cv2.IMREAD_GRAYSCALE)
# outcart = cv2.imread('./cart_tempt/outcart_ftmp.png', cv2.IMREAD_GRAYSCALE)
xc, yc = self.cornpoints[:,0].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,1].clip(0,self.imgshape[1]-1).astype(np.int64)
x1, y1 = self.cornpoints[:,6].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,7].clip(0,self.imgshape[1]-1).astype(np.int64)
x2, y2 = self.cornpoints[:,8].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,9].clip(0,self.imgshape[1]-1).astype(np.int64)
# print(self.tid)
Cent_inCartnum = np.count_nonzero(incart[(yc, xc)])
LB_inCartnum = np.count_nonzero(incart[(y1, x1)])
RB_inCartnum = np.count_nonzero(incart[(y2, x2)])
Cent_outCartnum = np.count_nonzero(outcart[(yc, xc)])
LB_outCartnum = np.count_nonzero(outcart[(y1, x1)])
RB_outCartnum = np.count_nonzero(outcart[(y2, x2)])
'''Track完全在车内左下角点、右下角点与 outcart 的交集为 0'''
self.isWholeInCart = False
if LB_outCartnum + RB_outCartnum == 0:
self.isWholeInCart = True
'''Track完全在车外左下角点、中心点与 incart 的交集为 0
右下角点、中心点与 incart 的交集为 0
'''
self.isWholeOutCart = False
if Cent_inCartnum + LB_inCartnum == 0 or Cent_inCartnum + RB_inCartnum == 0:
self.isWholeOutCart = True
self.Cent_isIncart = False
self.LB_isIncart = False
self.RB_isIncart = False
if Cent_inCartnum: self.Cent_isIncart = True
if LB_inCartnum: self.LB_isIncart = True
if RB_inCartnum: self.RB_isIncart = True
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart
def is_freemove(self):
# if self.tid==4:
# print(f"track ID: {self.tid}")
# boxes = self.boxes
# features = self.features
# similars = 1 - np.maximum(0.0, cdist(self.features, self.features, metric = 'cosine'))
box1 = self.boxes[0, :4]
box2 = self.boxes[-1, :4]
''' 第1帧、最后一帧subimg的相似度 '''
feat1 = self.features[0, :][None, :]
feat2 = self.features[-1, :][None, :]
similar = 1 - np.maximum(0.0, cdist(feat1, feat2, metric = 'cosine'))
condta = similar > 0.8
''' 第1帧、最后一帧 boxes 四个角点间的距离 '''
ptd = box2 - box1
ptd1 = np.linalg.norm((ptd[0], ptd[1]))
ptd2 = np.linalg.norm((ptd[2], ptd[1]))
ptd3 = np.linalg.norm((ptd[0], ptd[3]))
ptd4 = np.linalg.norm((ptd[2], ptd[3]))
condtb = ptd1<50 and ptd2<50 and ptd3<50 and ptd4<50
condt = condta and condtb
return condt
def extract_hand_features(self):
assert self.cls == 0, "The class of traj must be HAND!"
self.isHandStatic = False
x0 = (self.boxes[:, 0] + self.boxes[:, 2]) / 2
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
handXY = np.stack((x0, y0), axis=-1)
# handMaxY0 = np.max(y0)
handCenter = np.array([(max(x0)+min(x0))/2, (max(y0)+min(y0))/2])
handMaxDist = np.max(np.linalg.norm(handXY - handCenter))
if handMaxDist < self.HAND_STATIC_THRESH:
self.isHandStatic = True
return
class doTracks:
def __init__(self, bboxes, trackefeats):
'''fundamental property
trackefeats: dict, key 格式 "fid_bid"
'''
self.bboxes = bboxes
# self.TracksDict = TracksDict
self.frameID = np.unique(bboxes[:, 7].astype(int))
self.trackID = np.unique(bboxes[:, 4].astype(int))
self.lboxes = self.array2list()
self.lfeats = self.getfeats(trackefeats)
'''对 self.tracks 中的元素进行分类,将 track 归入相应列表中'''
self.Hands = []
self.Kids = []
self.Static = []
self.Residual = []
self.Confirmed = []
self.DownWard = [] # subset of self.Residual
self.UpWard = [] # subset of self.Residual
self.FreeMove = [] # subset of self.Residual
def array2list(self):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
track_ids = self.bboxes[:, 4].astype(int)
lboxes = []
for t_id in self.trackID:
# print(f"The ID is: {t_id}")
idx = np.where(track_ids == t_id)[0]
box = self.bboxes[idx, :]
assert len(set(box[:, 7])) == len(box), "Please check!!!"
lboxes.append(box)
return lboxes
def getfeats(self, trackefeats):
lboxes = self.lboxes
lfeats = []
for boxes in lboxes:
feats = []
for i in range(boxes.shape[0]):
fid, bid = int(boxes[i, 7]), int(boxes[i, 8])
key = f"{int(fid)}_{int(bid)}"
if key in trackefeats:
feats.append(trackefeats[key])
feats = np.asarray(feats, dtype=np.float32)
lfeats.append(feats)
return lfeats
def similarity(self):
nt = len(self.tracks)
similar_dict = {}
if nt >= 2:
for i in range(nt):
for j in range(i, nt):
tracka = self.tracks[i]
trackb = self.tracks[j]
similar = self.feat_similarity(tracka, trackb)
similar_dict.update({(tracka.tid, trackb.tid): similar})
return similar_dict
def feat_similarity(self, tracka, trackb, metric='cosine'):
boxes_a, boxes_b = tracka.boxes, trackb.boxes
na, nb = tracka.boxes.shape[0], trackb.boxes.shape[0]
feata, featb = [], []
for i in range(na):
fid, bid = tracka.boxes[i, 7:9]
feata.append(self.features_dict[fid][bid])
for i in range(nb):
fid, bid = trackb.boxes[i, 7:9]
featb.append(self.features_dict[fid][bid])
feata = np.asarray(feata, dtype=np.float32)
featb = np.asarray(featb, dtype=np.float32)
similarity_matrix = 1-np.maximum(0.0, cdist(feata, featb, metric))
feata_m = np.mean(feata, axis =0)[None,:]
featb_m = np.mean(featb, axis =0)[None,:]
simi_ab = 1 - cdist(feata_m, featb_m, metric)
print(f'tid {int(boxes_a[0, 4])} vs {int(boxes_b[0, 4])}: {simi_ab[0][0]}')
# return np.max(similarity_matrix)
return simi_ab
def merge_tracks_loop(self, alist):
na, nb = len(alist), 0
while na!=nb:
na = len(alist)
alist = self.merge_tracks(alist) #func is from subclass
nb = len(alist)
return alist
def base_merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
"""
mergedTracks = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
# afids = []
# for track in cur_list:
# afids.extend(list(track.boxes[:, 7].astype(np.int_)))
# bfids = btrack.boxes[:, 7].astype(np.int_)
# interfid = set(afids).intersection(set(bfids))
# if len(interfid):
# print("wait!!!")
# if track_equal_track(atrack, btrack) and len(interfid)==0:
if track_equal_track(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
mergedTracks.append(cur_list)
return mergedTracks
@staticmethod
def join_tracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
exists[t.tid] = 1
res.append(t)
for t in tlistb:
tid = t.tid
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
@staticmethod
def sub_tracks(tlista, tlistb):
track_ids_b = {t.tid for t in tlistb}
return [t for t in tlista if t.tid not in track_ids_b]
def array2frame(self, bboxes):
frameID = np.sort(np.unique(bboxes[:, 7].astype(int)))
fboxes = []
for fid in frameID:
idx = np.where(bboxes[:, 7] == fid)[0]
box = bboxes[idx, :]
fboxes.append(box)
return fboxes
def isintrude(self):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
OverlapNum = 3
bboxes = self.bboxes.astype(np.int64)
fboxes = self.array2frame(bboxes)
incart = cv2.bitwise_not(self.incart)
sum_incart = np.zeros(incart.shape, dtype=np.int64)
for fid, boxes in enumerate(fboxes):
for i in range(len(boxes)):
x1, y1, x2, y2 = boxes[i, 0:4]
sum_incart[y1:y2, x1:x2] += 1
sumincart = np.zeros(sum_incart.shape, dtype=np.uint8)
idx255 = np.where(sum_incart >= OverlapNum)
sumincart[idx255] = 255
idxnzr = np.where(sum_incart!=0)
base = np.zeros(sum_incart.shape, dtype=np.uint8)
base[idxnzr] = 255
contours_sum, _ = cv2.findContours(sumincart, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours_base, _ = cv2.findContours(base, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
have_existed, invasion = [], []
for k, ct_temp in enumerate(contours_base):
tmp1 = np.zeros(sum_incart.shape, dtype=np.uint8)
cv2.drawContours(tmp1, [ct_temp], -1, 255, cv2.FILLED)
# 确定轮廓的包含关系
for ct_sum in contours_sum:
tmp2 = np.zeros(sum_incart.shape, dtype=np.uint8)
cv2.drawContours(tmp2, [ct_sum], -1, 255, cv2.FILLED)
tmp = cv2.bitwise_and(tmp1, tmp2)
if np.count_nonzero(tmp) == np.count_nonzero(tmp2):
have_existed.append(k)
inIdx = [i for i in range(len(contours_base)) if i not in have_existed]
invasion = np.zeros(sum_incart.shape, dtype=np.uint8)
for i in inIdx:
cv2.drawContours(invasion, [contours_base[i]], -1, 255, cv2.FILLED)
cv2.imwrite("./result/intrude/invasion.png", invasion)
Intrude = True if len(inIdx)>=1 else False
print(f"is intruded: {Intrude}")
return Intrude

View File

@ -0,0 +1,267 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:36:31 2024
@author: ym
"""
import numpy as np
import cv2
import copy
from tracking.utils.mergetrack import track_equal_track
from scipy.spatial.distance import cdist
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
from .dotracks import doTracks, ShoppingCart
from .track_back import backTrack
class doBackTracks(doTracks):
def __init__(self, bboxes, trackefeats):
super().__init__(bboxes, trackefeats)
self.tracks = [backTrack(b, f) for b, f in zip(self.lboxes, self.lfeats)]
# self.similar_dict = self.similarity()
# self.shopcart = ShoppingCart(bboxes)
self.incart = self.getincart()
def getincart(self):
img1 = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread(str(parpath/'shopcart/cart_tempt/cartedge.png'), cv2.IMREAD_GRAYSCALE)
ret, binary1 = cv2.threshold(img1, 250, 255, cv2.THRESH_BINARY)
ret, binary2 = cv2.threshold(img2, 250, 255, cv2.THRESH_BINARY)
binary = cv2.bitwise_or(binary1, binary2)
return binary
def classify(self):
'''功能:对 tracks 中元素分类 '''
tracks = self.tracks
# 提取手的frame_id并和动目标的frame_id 进行关联
hand_tracks = [t for t in tracks if t.cls==0]
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
# 提取小孩的track并计算状态left, right, incart
kid_tracks = [t for t in tracks if t.cls==9]
kid_states = [self.kid_state(t) for t in kid_tracks]
self.Kids = [x for x in zip(kid_tracks, kid_states)]
tracks = self.sub_tracks(tracks, kid_tracks)
out_trcak = [t for t in tracks if t.isWholeOutCart]
tracks = self.sub_tracks(tracks, out_trcak)
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
self.Static.extend(static_tracks)
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
tracks_free = [t for t in tracks if t.frnum>1 and t.is_freemove()]
self.FreeMove.extend(tracks_free)
tracks = self.sub_tracks(tracks, tracks_free)
# '''购物框边界外具有运动状态的干扰目标'''
# out_trcak = [t for t in tracks if t.is_OutTrack()]
# tracks = self.sub_tracks(tracks, out_trcak)
'''轨迹循环归并'''
# merged_tracks = self.merge_tracks(tracks)
merged_tracks = self.merge_tracks_loop(tracks)
[self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in tracks]
tracks = [t for t in merged_tracks if t.frnum > 1]
self.merged_tracks = merged_tracks
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
self.Static.extend(static_tracks)
tracks = self.sub_tracks(tracks, static_tracks)
# for gtrack in tracks:
# for htrack in hand_tracks:
# hand_ious = self.associate_with_hand(htrack, gtrack)
# if len(hand_ious):
# gtrack.Hands.append(htrack)
# gtrack.HandsIou.append(hand_ious)
# htrack.Goods.append((gtrack, hand_ious))
# for htrack in hand_tracks:
# self.merge_based_hands(htrack)
self.Residual = tracks
self.Confirmed = self.confirm_track()
def confirm_track(self):
Confirmed = None
mindist = 0
for track in self.Residual:
md = min(track.trajrects_wh)
if md > mindist:
mindist = copy.deepcopy(md)
Confirmed = copy.deepcopy(track)
if Confirmed is not None:
return [Confirmed]
return []
# def merge_based_hands(self, htrack):
# gtracks = htrack.Goods
# if len(gtracks) >= 2:
# atrack, afious = gtracks[0]
# btrack, bfious = gtracks[1]
def associate_with_hand(self, htrack, gtrack):
'''
迁移至基类:
手部 Track、商品 Track 建立关联的依据:
a. 运动帧的帧索引有交集
b. 帧索引交集部分iou均大于0
'''
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
hand_ious = []
hboxes = np.empty(shape=(0, 9), dtype = np.float64)
gboxes = np.empty(shape=(0, 9), dtype = np.float64)
# start, end 为索引值,需要 start:(end+1)
for start, end in htrack.moving_index:
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
for start, end in gtrack.moving_index:
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
fids = sorted(set(hfids).intersection(set(gfids)))
if len(fids)==0:
return None
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
for f in fids:
h = np.where(hboxes[:,7] == f)[0][0]
g = np.where(gboxes[:,7] == f)[0][0]
x11, y11, x12, y12 = hboxes[h, 0:4]
x21, y21, x22, y22 = gboxes[g, 0:4]
x1, y1 = max((x11, x21)), max((y11, y21))
x2, y2 = min((x12, x22)), min((y12, y22))
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
area1 = (x12 - x11) * (y12 - y11)
area2 = (x22 - x21) * (y22 - y21)
iou = union / (area1 + area2 - union + 1e-6)
if iou >= 0.01:
gtrack.Hands.append((htrack.tid, f, iou))
return gtrack.Hands
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
和 dotrack_front.py中函数相同可以合并可以合并至基类
"""
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
feats = np.empty((0, 256), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
ifeats = track.features.copy()
# iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
feats = np.concatenate((feats, ifeats), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
feats_fid = feats[fid_indices]
newtracks.append(backTrack(boxes_fid, feats_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged
def kid_state(self, track):
left_dist = track.cornpoints[:, 2]
right_dist = 1024 - track.cornpoints[:, 4]
if np.sum(left_dist<30)/track.frnum>0.8 and np.sum(right_dist>512)/track.frnum>0.7:
kidstate = "left"
elif np.sum(left_dist>512)/track.frnum>0.7 and np.sum(right_dist<30)/track.frnum>0.8:
kidstate = "right"
else:
kidstate = "incart"
return kidstate
def isuptrack(self, track):
Flag = False
return Flag
def isdowntrack(self, track):
Flag = False
return Flag
def isfreetrack(self, track):
Flag = False
return Flag

View File

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:38:20 2024
@author: ym
"""
import cv2
import copy
import numpy as np
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
# from tracking.utils.mergetrack import track_equal_track
from .dotracks import doTracks
from .track_front import frontTrack
class doFrontTracks(doTracks):
def __init__(self, bboxes, frameDictList):
super().__init__(bboxes, frameDictList)
# self.tracks = [frontTrack(b) for b in self.lboxes]
self.tracks = [frontTrack(b, f) for b, f in zip(self.lboxes, self.lfeats)]
self.incart = self.getincart()
def getincart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/incart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
def classify(self):
'''功能:对 tracks 中元素分类 '''
tracks = self.tracks
'''提取手的 tracks'''
hand_tracks = [t for t in tracks if t.cls==0]
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
'''提取小孩的 tracks'''
kid_tracks = [t for t in tracks if t.cls==9]
tracks = self.sub_tracks(tracks, kid_tracks)
out_trcak = [t for t in tracks if t.isWholeOutCart]
tracks = self.sub_tracks(tracks, out_trcak)
'''静态 tracks'''
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
tracks_free = [t for t in tracks if t.frnum>1 and t.is_freemove()]
self.FreeMove.extend(tracks_free)
tracks = self.sub_tracks(tracks, tracks_free)
# [self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in tracks]
'''轨迹循环归并'''
merged_tracks = self.merge_tracks_loop(tracks)
[self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in merged_tracks]
tracks = [t for t in merged_tracks if t.frnum > 1]
# for gtrack in tracks:
# # print(f"Goods ID:{gtrack.tid}")
# for htrack in hand_tracks:
# hand_ious = self.associate_with_hand(htrack, gtrack)
# if len(hand_ious):
# gtrack.Hands.append(htrack)
# gtrack.HandsIou.append(hand_ious)
'''静止 tracks 判断与剔除静止 tracks'''
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
tracks = self.sub_tracks(tracks, static_tracks)
freemoved_tracks = [t for t in tracks if t.is_free_move()]
tracks = self.sub_tracks(tracks, freemoved_tracks)
self.Residual = tracks
self.Confirmed = self.confirm_track()
def confirm_track(self):
Confirmed = None
mindist = 0
for track in self.Residual:
md = min(track.trajrects_wh)
if md > mindist:
mindist = copy.deepcopy(md)
Confirmed = copy.deepcopy(track)
if Confirmed is not None:
return [Confirmed]
return []
def associate_with_hand(self, htrack, gtrack):
'''
迁移至基类:
手部 Track、商品 Track 建立关联的依据:
a. 运动帧的帧索引有交集
b. 帧索引交集部分iou均大于0
'''
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
hboxes = np.empty(shape=(0, 9), dtype = np.float)
gboxes = np.empty(shape=(0, 9), dtype = np.float)
# start, end 为索引值,需要 start:(end+1)
for start, end in htrack.dynamic_y2:
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
for start, end in gtrack.dynamic_y1:
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
fids = sorted(set(hfids).intersection(set(gfids)))
if len(fids)==0:
return None
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
for f in fids:
h = np.where(hfids==f)[0][0]
g = np.where(gfids==f)[0][0]
x11, y11, x12, y12 = hboxes[h, 0:4]
x21, y21, x22, y22 = gboxes[g, 0:4]
x1, y1 = max((x11, x21)), max((y11, y21))
x2, y2 = min((x12, x22)), min((y12, y22))
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
area1 = (x12 - x11) * (y12 - y11)
area2 = (x22 - x21) * (y22 - y21)
iou = union / (area1 + area2 - union + 1e-6)
if iou >= 0.01:
gtrack.Hands.append((htrack.tid, f, iou))
return gtrack.Hands
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
和 dotrack_back.py中函数相同可以合并至基类
"""
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
feats = np.empty((0, 256), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
ifeats = track.features.copy()
# iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
feats = np.concatenate((feats, ifeats), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
feats_fid = feats[fid_indices]
newtracks.append(frontTrack(boxes_fid, feats_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged

View File

@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:28:47 2024
@author: ym
"""
import cv2
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from .dotracks import MoveState, Track
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class backTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, features, imgshape=(1024, 1280)):
super().__init__(boxes, features, imgshape)
'''该函数依赖项: self.cornpoints
MarginState: list, seven elements, 表示轨迹中boxes出现在图像的
[左上,右上,左中,右中,左下,右下底部]
'''
self.isCornpoint, self.MarginState = self.isimgborder()
'''该函数依赖项: self.isCornpoint不能在父类中初始化'''
self.trajfeature()
'''静止点帧索引'''
# self.static_index = self.compute_static_fids()
'''运动点帧索引(运动帧两端的静止帧索引)'''
# self.moving_index = self.compute_dynamic_fids()
self.static_index, self.moving_index = self.compute_static_dynamic_fids()
'''该函数依赖项: self.cornpoints定义 4 个商品位置变量:
self.Cent_isIncart, self.LB_isIncart, self.RB_isIncart
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart'''
self.PositionState(camerType="back")
'''self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates'''
self.compute_ious_feat()
def isimgborder(self, BoundPixel=10, BoundThresh=0.3):
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
condt1 = sum(abs(x1)<BoundPixel) / self.frnum > BoundThresh
condt2 = sum(abs(y1)<BoundPixel) / self.frnum > BoundThresh
condt3 = sum(abs(x2-self.imgshape[0])<BoundPixel) / self.frnum > BoundThresh
condt4 = sum(abs(y2-self.imgshape[1])<BoundPixel) / self.frnum > BoundThresh
condt = condt1 or condt2 or condt3 or condt4
isCornpoint = False
if condt:
isCornpoint = True
condtA = condt1 and condt2
condtB = condt3 and condt2
condtC = condt1 and not condt2 and not condt4
condtD = condt3 and not condt2 and not condt4
condtE = condt1 and condt4
condtF = condt3 and condt4
condtG = condt4 and not condt1 and not condt3
MarginState = [condtA, condtB, condtC, condtD, condtE, condtF, condtG]
return isCornpoint, MarginState
def PCA(self):
self.pca = PCA()
X = self.cornpoints[:, 0:2]
self.pca.fit(X)
def compute_ious_feat(self):
'''输出:
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
其中:
boxes流track中所有boxes形成的轨迹图可分为三部分incart, outcart, cartboarder
incart_iou, outcart_iou, cartboarder_iou各部分和 boxes流的 iou。
incart_iou = 0track在购物车外
outcart_iou = 0track在购物车内也可能是通过左下角、右下角置入购物车
maxbox_iou, minbox_ioutrack中最大、最小 box 和boxes流的iou二者差值越小越接近 1表明track的运动型越小。
incartrates: 各box和incart的iou时序由小变大反应的是置入过程由大变小反应的是取出过程
'''
incart = cv2.imread(str(parpath/"shopcart/cart_tempt/incart.png"), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/"shopcart/cart_tempt/outcart.png"), cv2.IMREAD_GRAYSCALE)
cartboarder = cv2.imread(str(parpath/"shopcart/cart_tempt/cartboarder.png"), cv2.IMREAD_GRAYSCALE)
incartrates = []
temp = np.zeros(incart.shape, np.uint8)
maxarea, minarea = 0, self.imgshape[0]*self.imgshape[1]
for i in range(self.frnum):
# x, y, w, h = self.boxes[i, 0:4]
x = (self.boxes[i, 2] + self.boxes[i, 0]) / 2
w = (self.boxes[i, 2] - self.boxes[i, 0]) / 2
y = (self.boxes[i, 3] + self.boxes[i, 1]) / 2
h = (self.boxes[i, 3] - self.boxes[i, 1]) / 2
if w*h > maxarea: maxarea = w*h
if w*h < minarea: minarea = w*h
cv2.rectangle(temp, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp1 = np.zeros(incart.shape, np.uint8)
cv2.rectangle(temp1, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp2 = cv2.bitwise_and(incart, temp1)
inrate = cv2.countNonZero(temp1)/(w*h)
incartrates.append(inrate)
isincart = cv2.bitwise_and(incart, temp)
isoutcart = cv2.bitwise_and(outcart, temp)
iscartboarder = cv2.bitwise_and(cartboarder, temp)
num_temp = cv2.countNonZero(temp)
num_incart = cv2.countNonZero(isincart)
num_outcart = cv2.countNonZero(isoutcart)
num_cartboarder = cv2.countNonZero(iscartboarder)
incart_iou = num_incart/num_temp
outcart_iou = num_outcart/num_temp
cartboarder_iou = num_cartboarder/num_temp
maxbox_iou = maxarea/num_temp
minbox_iou = minarea/num_temp
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
def compute_static_dynamic_fids(self):
if self.MarginState[0] or self.MarginState[2]:
idx1 = 4
elif self.MarginState[1] or self.MarginState[3]:
idx1 = 3
elif self.MarginState[4]:
idx1 = 2
elif self.MarginState[5]:
idx1 = 1
elif self.MarginState[6]:
if self.trajlens[1] < self.trajlens[2]:
idx1 = 1
else:
idx1 = 2
else:
idx1 = self.trajlens.index(min(self.trajlens))
# idx1 = self.trajlens.index(min(self.trajlens))
trajmin = self.trajectory[idx1]
static, dynamic = self.pt_state_fids(trajmin)
static = np.array(static)
dynamic = np.array(dynamic)
if static.size:
indx = np.argsort(static[:, 0])
static = static[indx]
if dynamic.size:
indx = np.argsort(dynamic[:, 0])
dynamic = dynamic[indx]
return static, dynamic
def is_static(self):
'''静态情况 1: 目标关键点最小相对运动轨迹 < 0.2, 指标值偏大
TrajFeat = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
'''
# print(f"TrackID: {self.tid}")
boxes = self.boxes
'''静态情况 1: '''
condt1 = self.TrajFeat[5] < 0.2 or self.TrajFeat[3] < 120
'''静态情况 2: 目标初始状态为静止,适当放宽关键点最小相对运动轨迹 < 0.5'''
condt2 = self.static_index.size > 0 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 5 \
and self.TrajFeat[5] < 0.5 \
and self.TrajFeat[1] < 240 \
and self.isWholeInCart
# and self.posState >= 2
# and self.TrajFeat[0] < 240 \
'''静态情况 3: 目标初始状态和最终状态均为静止'''
condt3 = self.static_index.shape[0] >= 2 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 5 \
and self.static_index[-1, 1] >= self.frnum-3 \
and self.TrajFeat[1] < 240 \
and self.isWholeInCart
# and self.posState >= 2
# and self.TrajFeat[0] < 240 \
condt4 = self.static_index.shape[0] >= 2 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 6 \
and self.static_index[-1, 0] <= self.frnum-5 \
and self.static_index[-1, 1] >= self.frnum-2
condt = condt1 or condt2 or condt3 or condt4
return condt
def is_OutTrack(self):
if self.posState <= 1:
isout = True
else:
isout = False
return isout
def compute_distance(self):
pass
def move_start_fid(self):
pass
def move_end_fid(self):
pass

View File

@ -0,0 +1,194 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:33:01 2024
@author: ym
"""
import numpy as np
import cv2
# from sklearn.cluster import KMeans
from .dotracks import MoveState, Track
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class frontTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, features, imgshape=(1024, 1280)):
super().__init__(boxes, features, imgshape)
self.CART_HIGH_THRESH1 = imgshape[1]/2.98
'''y1、y2静止状态区间值是 boxes 中对 axis=0 的索引,不是帧索引'''
det_y1 = np.diff(boxes[:, 1], axis=0)
det_y2 = np.diff(boxes[:, 3], axis=0)
self.static_y1, self.dynamic_y1 = self.pt_state_fids(det_y1)
self.static_y2, self.dynamic_y2 = self.pt_state_fids(det_y2)
self.isCornpoint = self.is_left_or_right_cornpoint()
self.isBotmpoint = self.is_bottom_cornpoint()
'''该函数依赖项: self.isCornpoint不能在父类中初始化'''
self.trajfeature()
self.PositionState(camerType="front")
'''手部状态分析'''
self.HAND_STATIC_THRESH = 100
self.CART_POSIT_0 = 430
self.CART_POSIT_1 = 620
def is_left_or_right_cornpoint(self):
''' 基于 all(boxes)
boxes左下角点和图像左下角点重叠 或
boxes右下角点和图像左下角点重叠
'''
x1, y1 = self.boxes[:, 0], self.boxes[:, 1]
x2, y2 = self.boxes[:, 2], self.boxes[:, 3]
# Left-Bottom cornpoint
condt1 = all(x1 < 5) and all(y2 > self.imgshape[1]-5)
# Right-Bottom cornpoint
condt2 = all(x2 > self.imgshape[0]-5) and all(y2 > self.imgshape[1]-5)
condt = condt1 or condt2
return condt
def is_edge_cornpoint(self):
'''基于 all(boxes)boxes是否和图像左右边缘重叠'''
x1, x2 = self.boxes[:, 0], self.boxes[:, 2]
condt = all(x1 < 3) or all(x2 > self.imgshape[0]-3)
return condt
def is_bottom_cornpoint(self):
'''基于 all(boxes)boxes是否和图像下边缘重叠'''
condt = all(self.boxes[:, 3] > self.imgshape[1]-20)
return condt
def is_static(self):
assert self.frnum > 1, "boxes number must greater than 1"
# print(f"The ID is: {self.tid}")
# 手部和小孩目标不考虑
if self.cls == 0 or self.cls == 9:
return False
# boxes 全部 y2=1280
if self.isBotmpoint:
return True
boxes = self.boxes
y0 = (boxes[:, 1]+boxes[:, 3])/2
## 纵轴矢量和
sum_y0 = y0[-1] - y0[0]
sum_y1 = boxes[-1, 1]-boxes[0, 1]
sum_y2 = boxes[-1, 3]-boxes[0, 3]
# 一些需要考虑的特殊情况
isbottom = max(boxes[:, 3]) > 1280-3
istop = min(boxes[:, 1]) < 3
isincart = min(y0) > self.CART_HIGH_THRESH1
uncert = abs(sum_y1)<100 and abs(sum_y2)<100
'''初始条件:商品中心点始终在购物车内、'''
condt0 = max((boxes[:, 1]+boxes[:, 3])/2) > self.CART_HIGH_THRESH1
'''条件1轨迹运动纵向和y1 或 y2描述商品轨迹长度存在情况
(1). 检测框可能与图像上下边缘重合,
(2). 上边或下边存在跳动
'''
if isbottom and istop:
condt1 = abs(sum_y0) < 300
elif isbottom: # y2在底部用y1表征运动
condt1 = sum_y1 > -120 and abs(sum_y0)<80 # 有底部点方向向上阈值小于100
elif istop: # y1在顶部用y2表征运动
condt1 = abs(sum_y2) < 100
else:
condt1 = (abs(sum_y1) < 30 or abs(sum_y2)<30)
'''条件2轨迹的开始和结束阶段均处于静止状态, 利用静止状态区间判断,用 y1
a. 商品在购物车内,
b. 检测框的起始阶段和结束阶段均为静止状态
c. 静止帧长度 > 3'''
condt2 = False
if len(self.static_y1)>=2:
condt_s0 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 3
condt_s1 = self.static_y1[-1][1]==self.frnum-1 and self.static_y1[-1][1] - self.static_y1[-1][0] >= 3
condt2 = condt_s0 and condt_s1 and isincart
condt = condt0 and (condt1 or condt2)
return condt
def is_upward(self):
'''判断商品是否取出,'''
print(f"The ID is: {self.tid}")
def is_free_move(self):
if self.frnum == 1:
return True
# print(f"The ID is: {self.tid}")
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
det_y0 = np.diff(y0, axis=0)
sum_y0 = y0[-1] - y0[0]
'''情况1中心点向下 '''
## 初始条件:商品第一次检测到在购物车内
condt0 = y0[0] > self.CART_HIGH_THRESH1
condt_a = False
## 条件1商品初始为静止状态静止条件应严格一些
condt11, condt12 = False, False
if len(self.static_y1)>0:
condt11 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 5
if len(self.static_y2)>0:
condt12 = self.static_y2[0][0]==0 and self.static_y2[0][1] - self.static_y2[0][0] >= 5
# 条件2商品中心发生向下移动
condt2 = y0[-1] > y0[0]
# 综合判断a
condt_a = condt0 and (condt11 or condt12) and condt2
'''情况2中心点向上 '''
## 商品中心点向上移动但没有关联的Hand轨迹也不是左右边界点
condt_b = condt0 and len(self.Hands)==0 and y0[-1] < y0[0] and (not self.is_edge_cornpoint()) and min(y0)>self.CART_HIGH_THRESH1
'''情况3: 商品在购物车内,但运动方向无序'''
## 中心点在购物车内,纵向轨迹和小于轨迹差中绝对值最大的两个值的和,说明运动没有主方向
condt_c = False
if self.frnum > 3:
condt_c = all(y0>self.CART_HIGH_THRESH1) and \
(abs(sum_y0) < sum(np.sort(np.abs(det_y0))[::-1][:2])-1)
condt = (condt_a or condt_b or condt_c) and self.cls!=0
return condt

View File

@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 29 10:28:21 2024
未来需将这一部分和轨迹分析代码集成
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
class TProp:
def __init__(self, boxes):
self.boxes = boxes
class TProp:
'''抽象基类,不能实例化对象'''
def __init__(self, boxes):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
# assert len(set(boxes[:, 4].astype(int))) == 1, "For a Track, track_id more than 1"
# assert len(set(boxes[:, 6].astype(int))) == 1, "For a Track, class number more than 1"
self.boxes = boxes
'''5个关键点中心点、左上点、右上点、左下点、右下点 )坐标'''
self.compute_cornpoints()
'''5个关键点轨迹特征可以在子类中实现降低顺序处理时的计算量
(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
self.distmax = max(self.trajdist)
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((self.frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
self.cornpoints = cornpoints
def compute_cornpts_feats(self):
'''
'''
trajectory = []
trajlens = []
trajdist = []
trajrects = []
for k in range(5):
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
X = self.cornpoints[:, 2*k:2*(k+1)]
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
trajectory.append(traj)
trajlen = np.sum(traj)
trajlens.append(trajlen)
ptdist = np.max(cdist(X, X))
trajdist.append(ptdist)
'''最小外接矩形:
rect[0]: 中心(x, y)
rect[1]: (w, h)
rect[0]: 旋转角度 (-90°, 0]
'''
rect = cv2.minAreaRect(X.astype(np.int64))
trajrects.append(rect)
self.trajectory = trajectory
self.trajlens = trajlens
self.trajdist = trajdist
self.trajrects = trajrects

View File

@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 11:04:48 2024
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
# from trackers.utils import matching
def readDict(boxes, feat_dicts):
feat = []
for i in range(boxes.shape[0]):
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
feat.append(feat_dicts[fid][bid])
# img = feat_dicts[fid][f'{bid}_img']
# cv2.imwrite(f'./result/imgs/{tid}_{fid}_{bid}.png', img)
return np.asarray(feat, dtype=np.float32)
def track_equal_track(atrack, btrack, feat_dicts):
# boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
aboxes = atrack.boxes
bboxes = btrack.boxes
''' 1. 判断轨迹在时序上是否有交集 '''
afids = aboxes[:, 7].astype(np.int_)
bfids = bboxes[:, 7].astype(np.int_)
# 帧索引交集
interfid = set(afids).intersection(set(bfids))
# 或者直接判断帧索引是否有交集,返回 Ture or False
# interfid = set(afids).isdisjoint(set(bfids))
''' 2. 轨迹空间iou'''
alabel = np.array([0] * afids.size, dtype=np.int_)
blabel = np.array([1] * bfids.size, dtype=np.int_)
label = np.concatenate((alabel, blabel), axis=0)
fids = np.concatenate((afids, bfids), axis=0)
indices = np.argsort(fids)
idx_pair = []
for i in range(len(indices)-1):
idx1, idx2 = indices[i], indices[i+1]
if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] == 1:
if label[idx1] == 0:
a_idx = idx1
b_idx = idx2-alabel.size
else:
a_idx = idx2
b_idx = idx1-alabel.size
idx_pair.append((a_idx, b_idx))
ious = []
for a, b in idx_pair:
abox, bbox = aboxes[a, :], bboxes[b, :]
xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2
xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2
inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
(np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
# Union Area
box1_area = abox[2] * abox[3]
box2_area = bbox[2] * bbox[3]
union = box1_area + box2_area - inter + 1e-6
ious.append(inter/union)
''' 3. 轨迹特征相似度判断'''
afeat = readDict(aboxes, feat_dicts)
bfeat = readDict(bboxes, feat_dicts)
feat = np.concatenate((afeat, bfeat), axis=0)
emb_simil = 1-np.maximum(0.0, cdist(feat, feat, 'cosine'))
emb_ = 1-cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine')
cont1 = False if len(interfid) else True
cont2 = all(iou>0.5 for iou in ious)
cont3 = emb_[0, 0]>0.75
cont = cont1 and cont2 and cont3
return cont
def track_equal_str(atrack, btrack):
if atrack == btrack:
return True
else:
return False
def merge_track(Residual):
out_list = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
if track_equal_str(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
out_list.append(cur_list)
return out_list
def main():
Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
out_list = merge_track(Residual)
print(Residual)
print(out_list)
if __name__ == "__main__":
main()
# =============================================================================
# for i, atrack in enumerate(input_list):
# cur_list = []
# cur_list.append(atrack)
# del input_list[i]
#
# for j, btrack in enumerate(input_list):
# if track_equal(atrack, btrack):
# cur_list.append(btrack)
# del input_list[j]
#
# out_list.append(cur_list)
# =============================================================================

View File

@ -0,0 +1,459 @@
# -*- coding: utf-8 -*-
"""
Created on Thu May 30 14:03:03 2024
现场测试性能分析
@author: ym
"""
import os
import cv2
import numpy as np
from pathlib import Path
import warnings
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.plotting import Annotator, colors, draw_tracking_boxes
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
from tracking.trackers import BOTSORT, BYTETracker
from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks
from tracking.utils.drawtracks import plot_frameID_y2, draw_all_trajectories
from tracking.utils.read_data import extract_data, read_deletedBarcode_file, read_tracking_output, read_returnGoods_file
from contrast.one2n_contrast import get_contrast_paths, one2n_return
from tracking.utils.annotator import TrackAnnotator
W, H = 1024, 1280
Mode = 'front' #'back'
ImgFormat = ['.jpg', '.jpeg', '.png', '.bmp']
'''调用tracking()函数,利用本地跟踪算法获取各目标轨迹,可以比较本地跟踪算法与现场跟踪算法的区别。'''
def init_tracker(tracker_yaml = None, bs=1):
"""
Initialize tracker for object tracking during prediction.
"""
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
return tracker
def tracking(bboxes, ffeats):
tracker_yaml = r"./trackers/cfg/botsort.yaml"
tracker = init_tracker(tracker_yaml)
TrackBoxes = np.empty((0, 9), dtype = np.float32)
TracksDict = {}
'''========================== 执行跟踪处理 ============================='''
# dets 与 feats 应保持严格对应
for dets, feats in zip(bboxes, ffeats):
det_tracking = Boxes(dets).cpu().numpy()
tracks = tracker.update(det_tracking, features=feats)
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
这里frame_index 也可以用视频的 帧ID 代替, box_index 保持不变
'''
if len(tracks):
TrackBoxes = np.concatenate([TrackBoxes, tracks], axis=0)
FeatDict = {}
for track in tracks:
tid = int(track[8])
FeatDict.update({tid: feats[tid, :]})
frameID = tracks[0, 7]
# print(f"frameID: {int(frameID)}")
assert len(tracks) == len(FeatDict), f"Please check the func: tracker.update() at frameID({int(frameID)})"
TracksDict[f"frame_{int(frameID)}"] = {"feats":FeatDict}
return TrackBoxes, TracksDict
def read_imgs(imgspath, CamerType):
'''
inputs:
imgspath序列图像地址
CamerType相机类型0后摄1前摄
outputs
imgs图像序列
功能:
根据CamerType类型读取imgspath文件夹中的图像并根据帧索引进行排序。
do_tracking()中调用该函数实现1读取imgs并绘制各目标轨迹框2获取subimgs
'''
imgs, frmIDs = [], []
for filename in os.listdir(imgspath):
file, ext = os.path.splitext(filename)
flist = file.split('_')
if len(flist)==4 and ext in ImgFormat:
camID, frmID = flist[0], int(flist[-1])
if camID==CamerType:
img = cv2.imread(os.path.join(imgspath, filename))
imgs.append(img)
frmIDs.append(frmID)
if len(frmIDs):
indice = np.argsort(np.array(frmIDs))
imgs = [imgs[i] for i in indice]
return imgs
def do_tracking(fpath, savedir, event_name='images'):
'''
args:
fpath: 算法各模块输出的data文件地址匹配
savedir: 对 fpath 各模块输出的复现;
分析具体视频时,需指定 fpath 和 savedir
outputs:
img_tracking目标跟踪轨迹、本地轨迹分析算法的轨迹对比图
abimg现场轨迹分析算法、轨迹选择输出的对比图
'''
# fpath = r'D:\contrast\dataset\1_to_n\709\20240709-102758_6971558612189\1_track.data'
# savedir = r'D:\contrast\dataset\result\20240709-102843_6958770005357_6971558612189\error_6971558612189'
imgpath, dfname = os.path.split(fpath)
CamerType = dfname.split('_')[0]
'''1.1 构造 0/1_tracking_output.data 文件地址,读取文件数据'''
tracking_output_path = os.path.join(imgpath, CamerType + '_tracking_output.data')
basename = os.path.basename(imgpath)
if not os.path.isfile(fpath):
print(f"{basename}: Can't find {dfname} file!")
return None, None
if not os.path.isfile(tracking_output_path):
print(f"{basename}: Can't find {CamerType}_tracking_output.data file!")
return None, None
bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, tracking_feat_dict = extract_data(fpath)
tracking_output_boxes, _ = read_tracking_output(tracking_output_path)
'''1.2 利用本地跟踪算法生成各商品轨迹'''
# trackerboxes, tracker_feat_dict = tracking(bboxes, ffeats)
'''1.3 分别构造 2 个文件夹,(1) 存储画框后的图像; (2) 运动轨迹对应的 boxes子图'''
save_dir = os.path.join(savedir, event_name + '_images')
subimg_dir = os.path.join(savedir, event_name + '_subimgs')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if not os.path.exists(subimg_dir):
os.makedirs(subimg_dir)
'''2. 执行轨迹分析, 保存轨迹分析前后的对比图示'''
traj_graphic = event_name + '_' + CamerType
if CamerType == '1':
vts = doFrontTracks(trackerboxes, tracker_feat_dict)
vts.classify()
plt = plot_frameID_y2(vts)
# ftpath = os.path.join(savedir, f"{traj_graphic}_front_y2.png")
# plt.savefig(ftpath)
plt.close()
edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
img_tracking = draw_all_trajectories(vts, edgeline, savedir, CamerType, draw5p=True)
elif CamerType == '0':
vts = doBackTracks(trackerboxes, tracker_feat_dict)
vts.classify()
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
img_tracking = draw_all_trajectories(vts, edgeline, savedir, CamerType, draw5p=True)
# imgpth = os.path.join(savedir, f"{traj_graphic}_.png")
# cv2.imwrite(str(imgpth), img)
else:
print("Please check data file!")
'''3 tracking() 算法输出后多轨迹选择问题分析'''
if CamerType == '1':
aline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
elif CamerType == '0':
aline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
else:
print("Please check data file!")
bline = aline.copy()
annotator = TrackAnnotator(aline, line_width=2)
for track in trackingboxes:
annotator.plotting_track(track)
aline = annotator.result()
annotator = TrackAnnotator(bline, line_width=2)
if not isinstance(tracking_output_boxes, list):
tracking_output_boxes = [tracking_output_boxes]
for track in tracking_output_boxes:
annotator.plotting_track(track)
bline = annotator.result()
abimg = np.concatenate((aline, bline), axis = 1)
abH, abW = abimg.shape[:2]
cv2.line(abimg, (int(abW/2), 0), (int(abW/2), abH), (128, 255, 128), 2)
# algpath = os.path.join(savedir, f"{traj_graphic}_alg.png")
# cv2.imwrite(str(algpath), abimg)
'''4. 画框后的图像和子图保存若imgs数与tracker中fid数不匹配只保存原图不保存子图'''
'''4.0 读取 fpath 中对应的图像 imgs '''
imgs = read_imgs(imgpath, CamerType)
'''4.1 imgs数 < trackerboxes 的 max(fid),返回原图'''
if len(imgs) < np.max(trackerboxes[:,7]):
for i in range(len(imgs)):
img_savepath = os.path.join(save_dir, CamerType + "_" + f"{i}.png")
cv2.imwrite(img_savepath, imgs[i])
print(f"{basename}: len(imgs) = {len(imgs)} < Tracker max(fid) = {int(np.max(trackerboxes[:,7]))}, 无法匹配画框")
return img_tracking, abimg
'''4.2 在 imgs 上画框并保存'''
imgs_dw = draw_tracking_boxes(imgs, trackerboxes)
for fid, img in imgs_dw:
img_savepath = os.path.join(save_dir, CamerType + "_fid_" + f"{int(fid)}.png")
cv2.imwrite(img_savepath, img)
'''4.3.2 保存轨迹选择对应的子图'''
# for track in tracking_output_boxes:
for track in vts.Residual:
for *xyxy, tid, conf, cls, fid, bid in track.boxes:
img = imgs[int(fid-1)]
x1, y1, x2, y2 = int(xyxy[0]/2), int(xyxy[1]/2), int(xyxy[2]/2), int(xyxy[3]/2)
subimg = img[y1:y2, x1:x2]
subimg_path = os.path.join(subimg_dir, f'{CamerType}_tid{int(tid)}_{int(fid)}_{int(bid)}.png' )
cv2.imwrite(subimg_path, subimg)
for track in tracking_output_boxes:
for *xyxy, tid, conf, cls, fid, bid in track:
img = imgs[int(fid-1)]
x1, y1, x2, y2 = int(xyxy[0]/2), int(xyxy[1]/2), int(xyxy[2]/2), int(xyxy[3]/2)
subimg = img[y1:y2, x1:x2]
subimg_path = os.path.join(subimg_dir, f'x_{CamerType}_tid{int(tid)}_{int(fid)}_{int(bid)}.png' )
cv2.imwrite(subimg_path, subimg)
return img_tracking, abimg
def tracking_simulate(eventpath, savepath):
'''args:
eventpath: 事件文件夹
savepath: 存储文件夹
遍历eventpath
'''
# =============================================================================
# '''1. 获取事件名'''
# event_names = os.path.basename(eventpath).strip().split('_')
# if len(event_names)==2 and len(event_names[1])>=8:
# enent_name = event_names[1]
# elif len(event_names)==2 and len(event_names[1])==0:
# enent_name = event_names[0]
# else:
# return
# =============================================================================
enent_name = os.path.basename(eventpath)
## only for simplify the filename
idx = enent_name.find('2024')
if idx>=0:
enent_name = enent_name[idx:(idx+15)]
'''2. 依次读取 0/1_track.data 中数据,进行仿真'''
illu_tracking, illu_select = [], []
for filename in os.listdir(eventpath):
# filename = '1_track.data'
if filename.find("track.data") < 0: continue
fpath = os.path.join(eventpath, filename)
if not os.path.isfile(fpath): continue
img_tracking, img_select = do_tracking(fpath, savepath, enent_name)
if img_select is not None:
illu_select.append(img_select)
if img_tracking is not None:
illu_tracking.append(img_tracking)
'''3. 共幅8图上下子图显示的是前后摄每一行4个子图分别为
(1) tracker输出原始轨迹; (2)本地tracking输出; (3)现场算法轨迹选择前轨迹; (4)现场算法轨迹选择后的轨迹
'''
if len(illu_select)==2:
Img_s = np.concatenate((illu_select[0], illu_select[1]), axis = 0)
H, W = Img_s.shape[:2]
cv2.line(Img_s, (0, int(H/2)), (int(W), int(H/2)), (128, 255, 128), 2)
elif len(illu_select)==1:
Img_s = illu_select[0]
else:
Img_s = None
if len(illu_tracking)==2:
Img_t = np.concatenate((illu_tracking[0], illu_tracking[1]), axis = 0)
H, W = Img_t.shape[:2]
cv2.line(Img_t, (0, int(H/2)), (int(W), int(H/2)), (128, 255, 128), 2)
elif len(illu_tracking)==1:
Img_t = illu_tracking[0]
else:
Img_t = None
'''3.1 保存输出轨迹图若tracking、select的shape相同则合并输出否则单独输出'''
imgpath_tracking = os.path.join(savepath, enent_name + '_tracking.png')
imgpath_select = os.path.join(savepath, enent_name + '_select.png')
imgpath_ts = os.path.join(savepath, enent_name + '_tracking_select.png')
if Img_t is not None and Img_s is not None and np.all(Img_s.shape==Img_t.shape):
Img_ts = np.concatenate((Img_t, Img_s), axis = 1)
H, W = Img_ts.shape[:2]
cv2.line(Img_ts, (int(W/2), 0), (int(W/2), int(H)), (0, 0, 255), 4)
cv2.imwrite(imgpath_ts, Img_ts)
else:
if Img_s: cv2.imwrite(imgpath_select, Img_s) # 不会执行到该处
if Img_t: cv2.imwrite(imgpath_tracking, Img_t) # 不会执行到该处
Img_ts = None
'''3.2 单独另存保存完好的 8 轨迹图'''
if Img_ts is not None:
basepath, _ = os.path.split(savepath)
trajpath = os.path.join(basepath, 'trajs')
if not os.path.exists(trajpath):
os.makedirs(trajpath)
traj_path = os.path.join(trajpath, enent_name+'.png')
cv2.imwrite(traj_path, Img_ts)
return Img_ts
# warnings.simplefilter("error", category=np.VisibleDeprecationWarning)
def main_loop():
del_barcode_file = r'\\192.168.1.28\share\测试_202406\0723\0723_3\deletedBarcode.txt'
basepath = r'\\192.168.1.28\share\测试_202406\0723\0723_3' # 测试数据文件夹地址
# del_barcode_file = r'\\192.168.1.28\share\测试_202406\1030\images\returnGoods.txt'
# basepath = r'\\192.168.1.28\share\测试_202406\1030\images' # 测试数据文件夹地址
'''获取性能测试数据相关路径'''
SavePath = r'D:\contrast\dataset\resultx' # 结果保存地址
saveimgs = True
if os.path.basename(del_barcode_file).find('deletedBarcode'):
relative_paths = get_contrast_paths(del_barcode_file, basepath, SavePath, saveimgs)
elif os.path.basename(del_barcode_file).find('returnGoods'):
blist = read_returnGoods_file(del_barcode_file)
errpairs, corrpairs, err_similarity, correct_similarity = one2n_return(blist)
relative_paths = []
for getoutevent, inputevent, errevent in errpairs:
relative_paths.append(os.path.join(basepath, getoutevent))
relative_paths.append(os.path.join(basepath, inputevent))
relative_paths.append(os.path.join(basepath, errevent))
# prefix = ["getout_", "input_", "error_"]
'''开始循环执行每次测试过任务'''
k = 0
for tuple_paths in relative_paths:
'''1. 生成存储结果图像的文件夹'''
namedirs = []
for data_path in tuple_paths:
base_name = os.path.basename(data_path).strip().split('_')
if len(base_name[-1]):
name = base_name[-1]
else:
name = base_name[0]
namedirs.append(name)
sdir = "_".join(namedirs)
savepath = os.path.join(SavePath, sdir)
# if os.path.exists(savepath):
# continue
if not os.path.exists(savepath):
os.makedirs(savepath)
'''2. 循环执行操作事件:取出、放入、错误匹配'''
for eventpath in tuple_paths:
try:
tracking_simulate(eventpath, savepath)
except Exception as e:
print(f'Error! {eventpath}, {e}')
# k +=1
# if k==1:
# break
def main():
'''
eventPaths: data文件地址该 data 文件包括 Pipeline 各模块输出
SavePath: 包含二级目录,一级目录为轨迹图像;二级目录为与data文件对应的序列图像存储地址。
'''
# eventPaths = r'\\192.168.1.28\share\测试_202406\0723\0723_3'
eventPaths = r'\\192.168.1.28\share\测试视频数据以及日志\各模块测试记录\展厅测试\1120_展厅模型v801测试\扫A放A'
savePath = r'D:\exhibition\result'
k=0
for pathname in os.listdir(eventPaths):
pathname = "20241121-144901-fdba61c6-aefa-4b50-876d-5e05998befdc_6920459905012_6920459905012"
eventpath = os.path.join(eventPaths, pathname)
savepath = os.path.join(savePath, pathname)
if not os.path.exists(savepath):
os.makedirs(savepath)
tracking_simulate(eventpath, savepath)
# try:
# tracking_simulate(eventpath, savepath)
# except Exception as e:
# print(f'Error! {eventpath}, {e}')
k += 1
if k==1:
break
if __name__ == "__main__":
# main_loop()
main()
# try:
# main_loop()
# except Exception as e:
# print(f'Error: {e}')

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

View File

@ -0,0 +1,6 @@
5幅图
incart.png
outcart.png
incart_ftmp.png
outcart_ftmp.png
cartboarder.png

View File

@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 19 18:17:55 2023
@author: ym
"""
import cv2
import os
import numpy as np
def tempt_add_adjc():
temp = cv2.imread("img.png")
path = r"D:\DeepLearning\yolov5\runs\trajectory"
patr = r"D:\DeepLearning\yolov5\tracking\result"
for filename in os.listdir(path):
imgpath = os.path.join(path, filename)
img = cv2.imread(imgpath)
img1 = cv2.add(img, temp)
img1path = os.path.join(patr, filename)
cv2.imwrite(img1path, img1)
def temp_add_boarder():
temp = cv2.imread("cartedge.png")
temp[640:, 0:20, :] = 255
temp[640:, -20:, :] = 255
temp[-20:, :, :] = 255
cv2.imwrite("cartboarder.png", temp)
def create_front_temp():
image = cv2.imread("./iCart4/b.png")
Height, Width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
board = cv2.bitwise_not(binary)
contours, _ = cv2.findContours(board, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
k = 0
for cnt in contours:
img = np.zeros((Height, Width), dtype=np.uint8)
cv2.drawContours(img, [cnt], -1, 255, 3)
k += 1
cv2.imwrite(f"./iCart4/back{k}.png", img)
imgshow = cv2.drawContours(image, contours, -1, (0,255,0), 3)
cv2.imwrite("./iCart4/board_back_line.png", imgshow)
# cv2.imwrite("./iCart4/4.png", board)
# cv2.imwrite("1.png", gray)
# cv2.imwrite("2.png", binary)
def create_back_temp():
'''
image1.png从中获取轮廓的初始图像
image2.png主要用于显示效果
Returnimg.png
'''
image = cv2.imread("image1.png")
Height, Width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray[:405, :] = 0
thresh, binary = cv2.threshold(gray, 254, 255, cv2.THRESH_BINARY)
cv2.imwrite("shopcart.png", binary)
imgshow = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
contours, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
imgshow = cv2.drawContours(imgshow, contours, -1, (0,255,0), 1)
cv2.imwrite("imgshow.png", imgshow)
image2 = cv2.imread("image2.png")
image2 = cv2.drawContours(image2, contours, -1, (0,255,0), 3)
for cnt in contours:
_, start, _, num = cv2.boundingRect(cnt)
x1 = (cnt[:, 0, 0] != 0)
x2 = (cnt[:, 0, 0] != Width-1)
x3 = (cnt[:, 0, 1] != Height-1)
x = (x1 & x2) & x3
idx = np.where(x)
cntx = cnt[idx, :, :][0]
cnt1 = cntx[:,0,:].copy()
cntx[:, 0, 1] -= 60
cnt2 = cntx[:,0,:].copy()
cv2.drawContours(image2,[cntx], 0, (0,0,255), 2)
img = np.zeros(gray.shape, np.uint8)
for i in range(len(cnt1)):
x1, y1 = cnt1[i]
x2, y2 = cnt2[i]
cv2.rectangle(img, (x1-1, y1-1), (x1+1, y1+1), 255, 1)
cv2.rectangle(img, (x2-1, y2-1), (x2+1, y2+1), 255, 1)
cv2.imwrite("img.png", img)
if __name__ == "__main__":
# create_back_temp()
# temp_add_boarder()
# tempt_add_adjc()
create_front_temp()

View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 13 09:39:42 2024
@author: ym
"""
import os
import time
import datetime
import numpy as np
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.read_data import extract_data, read_weight_timeConsuming
def main():
directory = r"\\192.168.1.28\share\测试_202406\0821\images"
TimeConsuming = []
DayHMS = []
for root, dirs, files in os.walk(directory):
if root.find('20240821') == -1: continue
for name in files:
if name.find('process.data') == -1: continue
datename = os.path.basename(root)[:15]
fpath = os.path.join(root, name)
WeightDict, SensorDict, ProcessTimeDict = read_weight_timeConsuming(fpath)
try:
t1 = ProcessTimeDict['algroDoStart'] # 算法处理的第一帧图像时间
t2 = ProcessTimeDict['breakinFirst'] # 第一次入侵时间
t3 = ProcessTimeDict['algroLastFrame'] # 算法处理的最后一帧图像时间
t4 = ProcessTimeDict['breakinLast'] # 最后一次入侵时间
t5 = ProcessTimeDict['weightStablityTime'] # 重力稳定时间
wv = ProcessTimeDict['weightValue'] # 重力值
t6 = ProcessTimeDict['YoloResnetTrackerEnd'] # Yolo、Resnet、tracker执行结束时间
t7 = ProcessTimeDict['trackingEnd'] # 轨迹分析结束时间
t8 = ProcessTimeDict['contrastEnd'] # 比对结束时间
t9 = ProcessTimeDict['algroStartToEnd'] # 算法总执行时间
t10 = ProcessTimeDict['weightstablityToEnd'] # 重力稳定至算法结束时间
t11 = ProcessTimeDict['frameEndToEnd'] # 最后一帧图像至算法结束时间
TimeConsuming.append((t1, t2, t3, t4, t5, wv, t6, t7, t8, t9, t10, t11))
DayHMS.append(datename)
except Exception as e:
print(f'Error! {datename}, {e}')
TimeConsuming = np.array(TimeConsuming, dtype = np.int64)
TimeTotal = np.concatenate((TimeConsuming,
TimeConsuming[:,4][:, None] - TimeConsuming[:,0][:, None],
TimeConsuming[:,4][:, None] - TimeConsuming[:,2][:, None]), axis=1)
tt = TimeTotal[:, 3]==0
TimeTotal0 = TimeTotal[tt]
DayHMS0 = [DayHMS[ti] for i, ti in enumerate(tt) if ti]
TimeTotalMinus = TimeTotal[TimeTotal[:, 5]<0]
TimeTotalAdd = TimeTotal[TimeTotal[:, 5]>=0]
TimeTotalAdd0 = TimeTotalAdd[TimeTotalAdd[:,3] == 0]
TimeTotalAdd1 = TimeTotalAdd[TimeTotalAdd[:,3] != 0]
TimeTotalMinus0 = TimeTotalMinus[TimeTotalMinus[:,3] == 0]
TimeTotalMinus1 = TimeTotalMinus[TimeTotalMinus[:,3] != 0]
print(f"Total number is {len(TimeConsuming)}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,94 @@
# Tracker
## Supported Trackers
- [x] ByteTracker
- [x] BoT-SORT
## Usage
### python interface:
You can use the Python interface to track objects using the YOLO model.
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt") # or a segmentation model .i.e yolov8n-seg.pt
model.track(
source="video/streams",
stream=True,
tracker="botsort.yaml", # or 'bytetrack.yaml'
show=True,
)
```
You can get the IDs of the tracked objects using the following code:
```python
from ultralytics import YOLO
model = YOLO("yolov8n.pt")
for result in model.track(source="video.mp4"):
print(
result.boxes.id.cpu().numpy().astype(int)
) # this will print the IDs of the tracked objects in the frame
```
If you want to use the tracker with a folder of images or when you loop on the video frames, you should use the `persist` parameter to tell the model that these frames are related to each other so the IDs will be fixed for the same objects. Otherwise, the IDs will be different in each frame because in each loop, the model creates a new object for tracking, but the `persist` parameter makes it use the same object for tracking.
```python
import cv2
from ultralytics import YOLO
cap = cv2.VideoCapture("video.mp4")
model = YOLO("yolov8n.pt")
while True:
ret, frame = cap.read()
if not ret:
break
results = model.track(frame, persist=True)
boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
ids = results[0].boxes.id.cpu().numpy().astype(int)
for box, id in zip(boxes, ids):
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
cv2.putText(
frame,
f"Id {id}",
(box[0], box[1]),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 255),
2,
)
cv2.imshow("frame", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
```
## Change tracker parameters
You can change the tracker parameters by editing the `tracker.yaml` file which is located in the ultralytics/cfg/trackers folder.
## Command Line Interface (CLI)
You can also use the command line interface to track objects using the YOLO model.
```bash
yolo detect track source=... tracker=...
yolo segment track source=... tracker=...
yolo pose track source=... tracker=...
```
By default, trackers will use the configuration in `ultralytics/cfg/trackers`. We also support using a modified tracker config file. Please refer to the tracker config files in `ultralytics/cfg/trackers`.
## Contribute to Our Trackers Section
Are you proficient in multi-object tracking and have successfully implemented or adapted a tracking algorithm with Ultralytics YOLO? We invite you to contribute to our Trackers section! Your real-world applications and solutions could be invaluable for users working on tracking tasks.
By contributing to this section, you help expand the scope of tracking solutions available within the Ultralytics YOLO framework, adding another layer of functionality and utility for the community.
To initiate your contribution, please refer to our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for comprehensive instructions on submitting a Pull Request (PR) 🛠️. We are excited to see what you bring to the table!
Together, let's enhance the tracking capabilities of the Ultralytics YOLO ecosystem 🙏!

View File

@ -0,0 +1,10 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from .bot_sort import BOTSORT
from .byte_tracker import BYTETracker
from .track import register_tracker
__all__ = 'register_tracker', 'BOTSORT', 'BYTETracker' # allow simpler import

View File

@ -0,0 +1,71 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import OrderedDict
import numpy as np
class TrackState:
"""Enumeration of possible object tracking states."""
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack:
"""Base class for object tracking, handling basic track attributes and operations."""
_count = 0
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# Multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
"""Return the last frame ID of the track."""
return self.frame_id
@staticmethod
def next_id():
"""Increment and return the global track ID counter."""
BaseTrack._count += 1
return BaseTrack._count
def activate(self, *args):
"""Activate the track with the provided arguments."""
raise NotImplementedError
def predict(self):
"""Predict the next state of the track."""
raise NotImplementedError
def update(self, *args, **kwargs):
"""Update the track with new observations."""
raise NotImplementedError
def mark_lost(self):
"""Mark the track as lost."""
self.state = TrackState.Lost
def mark_removed(self):
"""Mark the track as removed."""
self.state = TrackState.Removed
@staticmethod
def reset_id():
"""Reset the global track ID counter."""
BaseTrack._count = 0

View File

@ -0,0 +1,210 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from collections import deque
import numpy as np
from .basetrack import TrackState
from .byte_tracker import BYTETracker, STrack
from .utils import matching
# from .utils.gmc import GMC
from .utils.kalman_filter import KalmanFilterXYWH
# from .reid.reid_interface import ReIDInterface
# from .reid.config import config
from detecttracking.contrast.feat_extract.inference import FeatsInterface
# from contrast.feat_extract.config import config as conf
class BOTrack(STrack):
shared_kalman = KalmanFilterXYWH()
def __init__(self, tlwh, score, cls, feat=None, feat_history=50):
"""Initialize YOLOv8 object with temporal parameters, such as feature history, alpha and current features."""
super().__init__(tlwh, score, cls)
self.smooth_feat = None
self.curr_feat = None
if feat is not None:
self.update_features(feat)
self.features = deque([], maxlen=feat_history)
self.alpha = 0.9
def update_features(self, feat):
"""Update features vector and smooth it using exponential moving average."""
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
"""Predicts the mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[6] = 0
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
def re_activate(self, new_track, frame_id, new_id=False):
"""Reactivates a track with updated features and optionally assigns a new ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().re_activate(new_track, frame_id, new_id)
def update(self, new_track, frame_id):
"""Update the YOLOv8 instance with new track and frame ID."""
if new_track.curr_feat is not None:
self.update_features(new_track.curr_feat)
super().update(new_track, frame_id)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[:2] -= ret[2:] / 2
return ret
@staticmethod
def multi_predict(stracks):
"""Predicts the mean and covariance of multiple object tracks using shared Kalman filter."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][6] = 0
multi_mean[i][7] = 0
multi_mean, multi_covariance = BOTrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
def convert_coords(self, tlwh):
"""Converts Top-Left-Width-Height bounding box coordinates to X-Y-Width-Height format."""
return self.tlwh_to_xywh(tlwh)
@staticmethod
def tlwh_to_xywh(tlwh):
"""Convert bounding box to format `(center x, center y, width,
height)`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
return ret
class BOTSORT(BYTETracker):
def __init__(self, args, frame_rate=30, resnetModel=None):
"""Initialize YOLOv8 object with ReID module and GMC algorithm."""
super().__init__(args, frame_rate)
# ReID module
self.proximity_thresh = args.proximity_thresh
self.appearance_thresh = args.appearance_thresh
if args.with_reid:
# Haven't supported BoT-SORT(reid) yet
# self.encoder = ReIDInterface(config)
# self.encoder = FeatsInterface(conf)
self.encoder = FeatsInterface(resnetModel)
# self.gmc = GMC(method=args.gmc_method) # commented by WQG
def get_kalmanfilter(self):
"""Returns an instance of KalmanFilterXYWH for object tracking."""
return KalmanFilterXYWH()
def init_track(self, dets, scores, cls, image, features_keep):
"""Initialize track with detections, scores, and classes."""
if len(dets) == 0:
return []
if self.args.with_reid and self.encoder is not None:
if features_keep is None:
imgs, features_keep = self.encoder.inference(image, dets)
return [BOTrack(xyxy, s, c, f) for (xyxy, s, c, f) in zip(dets, scores, cls, features_keep)] # detections
else:
return [BOTrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] # detections
def get_dists(self, tracks, detections):
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
dists = matching.iou_distance(tracks, detections)
# proximity_thresh 应该设较大的值表示只有两个boxes离得较远时不考虑reid特征
dists_mask = (dists > self.proximity_thresh)
# TODO: mot20
# if not self.args.mot20:
dists = matching.fuse_score(dists, detections)
if self.args.with_reid and self.encoder is not None:
emb_dists = matching.embedding_distance(tracks, detections) / 2.0
emb_dists[emb_dists > self.appearance_thresh] = 1.0
emb_dists[dists_mask] = 1.0
dists = np.minimum(dists, emb_dists)
return dists
def get_dists_1(self, tracks, detections):
"""Get distances between tracks and detections using IoU and (optionally) ReID embeddings."""
iou_dists = matching.iou_distance(tracks, detections)
iou_dists_mask = (iou_dists>0.9)
iou_dists = matching.fuse_score(iou_dists, detections)
weight = 0.4
if self.args.with_reid and self.encoder is not None:
emb_dists = matching.embedding_distance(tracks, detections)
'''============ iou_dists 和 emb_dists 融合有两种策略 ==========='''
'''1. reid 相似度阈值,低于该值的两 boxes 图像不可能是同一对象,需要确定一个合理的可信阈值
2. iou 的约束为若约束,故 iou_dists 应设置为较大的值
'''
emb_dists_mask = (emb_dists > 0.9)
iou_dists[emb_dists_mask] = 1
emb_dists[iou_dists_mask] = 1
dists = np.minimum(iou_dists, emb_dists)
'''2. embed 阈值'''
# dists = (1-weight)*iou_dists + weight*emb_dists
else:
dists = iou_dists.copy()
return dists
def multi_predict(self, tracks):
"""Predict and track multiple objects with YOLOv8 model."""
BOTrack.multi_predict(tracks)
def get_result(self):
'''written by WQG'''
activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
track_features = []
if self.args.with_reid and self.encoder is not None:
track_features = np.asarray([x.curr_feat for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
return (activate_tracks, track_features)

View File

@ -0,0 +1,491 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import numpy as np
from .basetrack import BaseTrack, TrackState
from .utils import matching
from .utils.kalman_filter import KalmanFilterXYAH
def dists_update(dists, strack_pool, detections):
'''written by WQG'''
if len(strack_pool) and len(detections):
# alabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in strack_pool])
# blabel = np.array([int(stack.cls) if int(stack.cls)==0 or int(stack.cls)==9 else -1 for stack in detections])
alabel = np.array([int(stack.cls) for stack in strack_pool])
blabel = np.array([int(stack.cls) for stack in detections])
amlabel = np.expand_dims(alabel, axis=1).repeat(len(detections),axis=1)
bmlabel = np.expand_dims(blabel, axis=0).repeat(len(strack_pool),axis=0)
mlabel = bmlabel == amlabel
iou_dist = matching.iou_distance(strack_pool, detections) > 0.1 #boxes iou>0.9时,可以不考虑类别
dist_label = (1 - mlabel) & iou_dist # 不同类,且不是严格重叠,需考虑类别距离
dist_label = 1 - mlabel
dists = np.where(dists > dist_label, dists, dist_label)
return dists
class STrack(BaseTrack):
shared_kalman = KalmanFilterXYAH()
def __init__(self, tlwh, score, cls):
"""wait activate."""
self._tlwh = np.asarray(self.tlbr_to_tlwh(tlwh[:-1]), dtype=np.float32)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.first_find = False ###
self.score = score
self.tracklet_len = 0
self.cls = cls
self.idx = tlwh[-1]
def predict(self):
"""Predicts mean and covariance using Kalman filter."""
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
@staticmethod
def multi_predict(stracks):
"""Perform multi-object predictive tracking using Kalman filter for given stracks."""
if len(stracks) <= 0:
return
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
"""Update state tracks positions and covariances using a homography matrix."""
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
R = H[:2, :2]
R8x8 = np.kron(np.eye(4, dtype=float), R)
t = H[:2, 2]
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
mean = R8x8.dot(mean)
mean[:2] += t
cov = R8x8.dot(cov).dot(R8x8.transpose())
stracks[i].mean = mean
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet."""
self.kalman_filter = kalman_filter
self.track_id = self.next_id()
self.mean, self.covariance = self.kalman_filter.initiate(self.convert_coords(self._tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
if frame_id == 1:
self.is_activated = True
else:
self.first_find = True ### Add by WQG
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
"""Reactivates a previously lost track with a new detection."""
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
self.convert_coords(new_track.tlwh))
self.tracklet_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.first_find = False
self.frame_id = frame_id
if new_id:
self.track_id = self.next_id()
self.score = new_track.score
self.cls = new_track.cls
self.idx = new_track.idx
self._tlwh = new_track._tlwh
def update(self, new_track, frame_id):
"""
Update a matched track
:type new_track: STrack
:type frame_id: int
:return:
"""
self.frame_id = frame_id
self.tracklet_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance,
self.convert_coords(new_tlwh))
self.state = TrackState.Tracked
self.is_activated = True
self.first_find = False
self.score = new_track.score
self.cls = new_track.cls
self.idx = new_track.idx
self._tlwh = new_track._tlwh
def convert_coords(self, tlwh):
"""Convert a bounding box's top-left-width-height format to its x-y-angle-height equivalent."""
return self.tlwh_to_xyah(tlwh)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
@staticmethod
def tlbr_to_tlwh(tlbr):
"""Converts top-left bottom-right format to top-left width height format."""
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
"""Converts tlwh bounding box format to tlbr format."""
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
"""Return a string representation of the BYTETracker object with start and end frames and track ID."""
return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
class BYTETracker:
def __init__(self, args, frame_rate=30):
"""Initialize a YOLOv8 object to track objects with given arguments and frame rate."""
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.args = args
self.max_time_lost = int(frame_rate / 30.0 * args.track_buffer)
self.kalman_filter = self.get_kalmanfilter()
self.reset_id()
# Add by WQG
self.args.new_track_thresh = 0.5
def update(self, results, img=None, features=None):
"""Updates object tracker with new detections and returns tracked object bounding boxes."""
self.frame_id += 1
activated_stracks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
first_finded = []
scores = results.conf
cls = results.cls
# =============================================================================
# # get xyxy and add index
# bboxes = results.xyxy
# bboxes = np.concatenate([bboxes, np.arange(len(bboxes)).reshape(-1, 1)], axis=-1)
# =============================================================================
bboxes = results.xyxyb
remain_inds = scores > self.args.track_high_thresh
inds_low = scores > self.args.track_low_thresh
inds_high = scores < self.args.track_high_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
dets = bboxes[remain_inds]
scores_keep = scores[remain_inds]
scores_second = scores[inds_second]
cls_keep = cls[remain_inds]
cls_second = cls[inds_second]
detections = self.init_track(dets, scores_keep, cls_keep, img, features)
# Add newly detected tracklets to tracked_stracks
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
# Step 2: First association, with high score detection boxes
strack_pool = self.joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
self.multi_predict(strack_pool)
# ============================================================= 没必要gmcWQG
# if hasattr(self, 'gmc') and img is not None:
# warp = self.gmc.apply(img, dets)
# STrack.multi_gmc(strack_pool, warp)
# STrack.multi_gmc(unconfirmed, warp)
# =============================================================================
dists = self.get_dists_1(strack_pool, detections)
'''written by WQG for different class'''
dists = dists_update(dists, strack_pool, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_stracks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# Step 3: Second association, with low score detection boxes
# association the untrack to the low score detections
detections_second = self.init_track(dets_second, scores_second, cls_second, img, features)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
# TODO
dists = matching.iou_distance(r_tracked_stracks, detections_second)
'''written by WQG for different class'''
dists = dists_update(dists, r_tracked_stracks, detections_second)
matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_stracks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
for it in u_track:
track = r_tracked_stracks[it]
if track.state != TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
# Deal with unconfirmed tracks, usually tracks with only one beginning frame
detections = [detections[i] for i in u_detection]
dists = self.get_dists_1(unconfirmed, detections)
'''written by WQG for different class'''
dists = dists_update(dists, unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_stracks.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
if self.frame_id - track.end_frame > 2: # Add by WQG
track.mark_removed()
removed_stracks.append(track)
# Step 4: Init new stracks
for inew in u_detection:
track = detections[inew]
if track.score < self.args.new_track_thresh:
continue
track.activate(self.kalman_filter, self.frame_id)
activated_stracks.append(track)
first_finded.append(track)
# Step 5: Update state
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, activated_stracks)
self.tracked_stracks = self.joint_stracks(self.tracked_stracks, refind_stracks)
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = self.sub_stracks(self.lost_stracks, self.removed_stracks)
self.tracked_stracks, self.lost_stracks = self.remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
self.removed_stracks.extend(removed_stracks)
if len(self.removed_stracks) > 1000:
self.removed_stracks = self.removed_stracks[-999:] # clip remove stracks to 1000 maximum
'''x.tlbr have update by function:
@property
def tlwh(self):
'''
##================ 原算法输出
# output = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
# for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
## ===== write by WQG
output1 = [x.tlwh_to_tlbr(x._tlwh).tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
for x in self.tracked_stracks if x.is_activated]
output2 = [x.tlwh_to_tlbr(x._tlwh).tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
for x in first_finded if x.first_find]
output = np.asarray(output1 + output2, dtype=np.float32)
return output
def get_result(self):
'''written by WQG'''
# =============================================================================
# activate_tracks = np.asarray([x.tlbr.tolist() + [x.track_id, x.score, x.cls, x.idx]
# for x in self.tracked_stracks if x.is_activated], dtype=np.float32)
#
# track_features = []
# =============================================================================
tracks = []
feats = []
for t in self.tracked_stracks:
if t.is_activated or t.first_find:
track = t.tlbr.tolist() + [t.track_id, t.score, t.cls, t.idx]
feat = t.curr_feature
tracks.append(track)
feats.append(feat)
tracks = np.asarray(tracks, dtype=np.float32)
return (tracks, feats)
def get_kalmanfilter(self):
"""Returns a Kalman filter object for tracking bounding boxes."""
return KalmanFilterXYAH()
def init_track(self, dets, scores, cls, img=None, feats=None):
"""Initialize object tracking with detections and scores using STrack algorithm."""
return [STrack(xyxy, s, c) for (xyxy, s, c) in zip(dets, scores, cls)] if len(dets) else [] # detections
def get_dists(self, tracks, detections):
"""Calculates the distance between tracks and detections using IOU and fuses scores."""
dists = matching.iou_distance(tracks, detections)
# TODO: mot20
# if not self.args.mot20:
dists = matching.fuse_score(dists, detections)
return dists
def get_dists_1(self, tracks, detections):
"""Calculates the distance between tracks and detections using IOU and fuses scores."""
pass
def multi_predict(self, tracks):
"""Returns the predicted tracks using the YOLOv8 network."""
STrack.multi_predict(tracks)
def reset_id(self):
"""Resets the ID counter of STrack."""
STrack.reset_id()
@staticmethod
def joint_stracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
@staticmethod
def sub_stracks(tlista, tlistb):
"""DEPRECATED CODE in https://github.com/ultralytics/ultralytics/pull/1890/
stracks = {t.track_id: t for t in tlista}
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
"""
track_ids_b = {t.track_id for t in tlistb}
return [t for t in tlista if t.track_id not in track_ids_b]
@staticmethod
def remove_duplicate_stracks(stracksa, stracksb):
"""Remove duplicate stracks with non-maximum IOU distance."""
pdist = matching.iou_distance(stracksa, stracksb)
#### ===================================== written by WQG
mlabel = []
if len(stracksa) and len(stracksb):
alabel = np.array([int(stack.cls) for stack in stracksa])
blabel = np.array([int(stack.cls) for stack in stracksb])
amlabel = np.expand_dims(alabel, axis=1).repeat(len(stracksb),axis=1)
bmlabel = np.expand_dims(blabel, axis=0).repeat(len(stracksa),axis=0)
mlabel = bmlabel == amlabel
if len(mlabel):
condt = (pdist<0.15) & mlabel # 需满足iou足够小且类别相同才予以排除
else:
condt = pdist<0.15
pairs = np.where(condt)
dupa, dupb = [], []
for p, q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(stracksa) if i not in dupa]
resb = [t for i, t in enumerate(stracksb) if i not in dupb]
return resa, resb

View File

@ -0,0 +1,18 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.5 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
# mot20: False # for tracker evaluation(not used for now)
# BoT-SORT settings
gmc_method: sparseOptFlow # method of global motion compensation
# ReID model related thresh (not supported yet)
proximity_thresh: 0.5
appearance_thresh: 0.25
with_reid: True

View File

@ -0,0 +1,11 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.5 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
# min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
# mot20: False # for tracker evaluation(not used for now)

View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 19 16:15:35 2024
@author: ym
"""

Binary file not shown.

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 19 14:01:46 2024
@author: ym
"""
import torch
import os
# import torchvision.transforms as T
class Config:
# network settings
backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
batch_size = 8
embedding_size = 256
img_size = 224
ckpt_path = r"ckpts\resnet18_1220\best.pth"
ckpt_path = r"ckpts\best_resnet18_1887_0311.pth"
current_path = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_path, ckpt_path)
# model_path = "./trackers/reid/ckpts/resnet18_1220/best.pth"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# =============================================================================
# metric = 'arcface' # [cosface, arcface]
# drop_ratio = 0.5
#
# # training settings
# checkpoints = "checkpoints/Mobilev3Large_1225" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
# restore = False
#
# test_model = "./checkpoints/resnet18_1220/best.pth"
#
#
#
#
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# pin_memory = True # if memory is large, set it True to speed up a bit
# num_workers = 4 # dataloader
# =============================================================================
config = Config()

View File

@ -0,0 +1,83 @@
import torch.nn as nn
import torchvision
from torch.nn import init
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ChannelAttention(nn.Module):
def __int__(self,channel,reduction, num_layers):
super(ChannelAttention,self).__init__()
self.avgpool = nn.AdaptiveAvgPool2d(1)
gate_channels = [channel]
gate_channels += [len(channel)//reduction]*num_layers
gate_channels += [channel]
self.ca = nn.Sequential()
self.ca.add_module('flatten', Flatten())
for i in range(len(gate_channels)-2):
self.ca.add_module('',nn.Linear(gate_channels[i], gate_channels[i+1]))
self.ca.add_module('',nn.BatchNorm1d(gate_channels[i+1]))
self.ca.add_module('',nn.ReLU())
self.ca.add_module('',nn.Linear(gate_channels[-2], gate_channels[-1]))
def forward(self, x):
res = self.avgpool(x)
res = self.ca(res)
res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
return res
class SpatialAttention(nn.Module):
def __int__(self, channel,reduction=16,num_lay=3,dilation=2):
super(SpatialAttention).__init__()
self.sa = nn.Sequential()
self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel//reduction)*3))
self.sa.add_module('',nn.BatchNorm2d(num_features=(channel//reduction)))
self.sa.add_module('',nn.ReLU())
for i in range(num_lay):
self.sa.add_module('', nn.Conv2d(kernel_size=3,
in_channels=(channel//reduction),
out_channels=(channel//reduction),
padding=1,
dilation= 2))
self.sa.add_module('',nn.BatchNorm2d(channel//reduction))
self.sa.add_module('',nn.ReLU())
self.sa.add_module('',nn.Conv2d(channel//reduction, 1, kernel_size=1))
def forward(self,x):
res = self.sa(x)
res = res.expand_as(x)
return res
class BAMblock(nn.Module):
def __init__(self,channel=512, reduction=16, dia_val=2):
super(BAMblock, self).__init__()
self.ca = ChannelAttention(channel, reduction)
self.sa = SpatialAttention(channel,reduction,dia_val)
self.sigmoid = nn.Sigmoid()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bais is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self,x):
b, c, _, _ = x.size()
sa_out=self.sa(x)
ca_out=self.ca(x)
weight=self.sigmoid(sa_out+ca_out)
out=(1+weight)*x
return out
if __name__ =="__main__":
print(512//14)

View File

@ -0,0 +1,68 @@
import torch
import torch.nn as nn
import torch.nn.init as init
class channelAttention(nn.Module):
def __init__(self, channel, reduction=16):
super(channelAttention, self).__init__()
self.Maxpooling = nn.AdaptiveMaxPool2d(1)
self.Avepooling = nn.AdaptiveAvgPool2d(1)
self.ca = nn.Sequential()
self.ca.add_module('conv1',nn.Conv2d(channel, channel//reduction, 1, bias=False))
self.ca.add_module('Relu', nn.ReLU())
self.ca.add_module('conv2',nn.Conv2d(channel//reduction, channel, 1, bias=False))
self.sigmod = nn.Sigmoid()
def forward(self, x):
M_out = self.Maxpooling(x)
A_out = self.Avepooling(x)
M_out = self.ca(M_out)
A_out = self.ca(A_out)
out = self.sigmod(M_out+A_out)
return out
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super().__init__()
self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, padding=kernel_size // 2)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
max_result, _ = torch.max(x, dim=1, keepdim=True)
avg_result = torch.mean(x, dim=1, keepdim=True)
result = torch.cat([max_result, avg_result], dim=1)
output = self.conv(result)
output = self.sigmoid(output)
return output
class CBAM(nn.Module):
def __init__(self, channel=512, reduction=16, kernel_size=7):
super().__init__()
self.ca = channelAttention(channel, reduction)
self.sa = SpatialAttention(kernel_size)
def init_weights(self):
for m in self.modules():#权重初始化
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
# b,c_,_ = x.size()
# residual = x
out = x*self.ca(x)
out = out*self.sa(out)
return out
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
kernel_size=input.shape[2]
cbam = CBAM(channel=512,reduction=16,kernel_size=kernel_size)
output=cbam(input)
print(output.shape)

View File

@ -0,0 +1,33 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class GeM(nn.Module):
def __init__(self, p=3, eps=1e-6):
super(GeM, self).__init__()
self.p = nn.Parameter(torch.ones(1) * p)
self.eps = eps
def forward(self, x):
return self.gem(x, p=self.p, eps=self.eps, stride = 2)
def gem(self, x, p=3, eps=1e-6, stride = 2):
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1)), stride=2).pow(1. / p)
def __repr__(self):
return self.__class__.__name__ + \
'(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
', ' + 'eps=' + str(self.eps) + ')'
class TripletLoss(nn.Module):
def __init__(self, margin):
super(TripletLoss, self).__init__()
self.margin = margin
def forward(self, anchor, positive, negative, size_average = True):
distance_positive = (anchor-positive).pow(2).sum(1)
distance_negative = (anchor-negative).pow(2).sum(1)
losses = F.relu(distance_negative-distance_positive+self.margin)
return losses.mean() if size_average else losses.sum()
if __name__ == '__main__':
print('')

View File

@ -0,0 +1,9 @@
from .fmobilenet import FaceMobileNet
from .resnet_face import ResIRSE
from .mobilevit import mobilevit_s
from .metric import ArcFace, CosFace
from .loss import FocalLoss
from .resbam import resnet
from .resnet_pre import resnet18, resnet34, resnet50
from .mobilenet_v2 import mobilenet_v2
from .mobilenet_v3 import MobileNetV3_Small, MobileNetV3_Large

View File

@ -0,0 +1,124 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ConvBn(nn.Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_c)
)
def forward(self, x):
return self.net(x)
class ConvBnPrelu(nn.Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
super().__init__()
self.net = nn.Sequential(
ConvBn(in_c, out_c, kernel, stride, padding, groups),
nn.PReLU(out_c)
)
def forward(self, x):
return self.net(x)
class DepthWise(nn.Module):
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
super().__init__()
self.net = nn.Sequential(
ConvBnPrelu(in_c, groups, kernel=(1, 1), stride=1, padding=0),
ConvBnPrelu(groups, groups, kernel=kernel, stride=stride, padding=padding, groups=groups),
ConvBn(groups, out_c, kernel=(1, 1), stride=1, padding=0),
)
def forward(self, x):
return self.net(x)
class DepthWiseRes(nn.Module):
"""DepthWise with Residual"""
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
super().__init__()
self.net = DepthWise(in_c, out_c, kernel, stride, padding, groups)
def forward(self, x):
return self.net(x) + x
class MultiDepthWiseRes(nn.Module):
def __init__(self, num_block, channels, kernel=(3, 3), stride=1, padding=1, groups=1):
super().__init__()
self.net = nn.Sequential(*[
DepthWiseRes(channels, channels, kernel, stride, padding, groups)
for _ in range(num_block)
])
def forward(self, x):
return self.net(x)
class FaceMobileNet(nn.Module):
def __init__(self, embedding_size):
super().__init__()
self.conv1 = ConvBnPrelu(1, 64, kernel=(3, 3), stride=2, padding=1)
self.conv2 = ConvBn(64, 64, kernel=(3, 3), stride=1, padding=1, groups=64)
self.conv3 = DepthWise(64, 64, kernel=(3, 3), stride=2, padding=1, groups=128)
self.conv4 = MultiDepthWiseRes(num_block=4, channels=64, kernel=3, stride=1, padding=1, groups=128)
self.conv5 = DepthWise(64, 128, kernel=(3, 3), stride=2, padding=1, groups=256)
self.conv6 = MultiDepthWiseRes(num_block=6, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
self.conv7 = DepthWise(128, 128, kernel=(3, 3), stride=2, padding=1, groups=512)
self.conv8 = MultiDepthWiseRes(num_block=2, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
self.conv9 = ConvBnPrelu(128, 512, kernel=(1, 1))
self.conv10 = ConvBn(512, 512, groups=512, kernel=(7, 7))
self.flatten = Flatten()
self.linear = nn.Linear(2048, embedding_size, bias=False)
self.bn = nn.BatchNorm1d(embedding_size)
def forward(self, x):
#print('x',x.shape)
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.conv5(out)
out = self.conv6(out)
out = self.conv7(out)
out = self.conv8(out)
out = self.conv9(out)
out = self.conv10(out)
out = self.flatten(out)
out = self.linear(out)
out = self.bn(out)
return out
if __name__ == "__main__":
from PIL import Image
import numpy as np
x = Image.open("../samples/009.jpg").convert('L')
x = x.resize((128, 128))
x = np.asarray(x, dtype=np.float32)
x = x[None, None, ...]
x = torch.from_numpy(x)
net = FaceMobileNet(512)
net.eval()
with torch.no_grad():
out = net(x)
print(out.shape)

View File

@ -0,0 +1,18 @@
import torch
import torch.nn as nn
class FocalLoss(nn.Module):
def __init__(self, gamma=2):
super().__init__()
self.gamma = gamma
self.ce = torch.nn.CrossEntropyLoss()
def forward(self, input, target):
#print(f'theta {input.shape, input[0]}, target {target.shape, target}')
logp = self.ce(input, target)
p = torch.exp(-logp)
loss = (1 - p) ** self.gamma * logp
return loss.mean()

View File

@ -0,0 +1,83 @@
# Definition of ArcFace loss and CosFace loss
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class ArcFace(nn.Module):
def __init__(self, embedding_size, class_num, s=30.0, m=0.50):
"""ArcFace formula:
cos(m + theta) = cos(m)cos(theta) - sin(m)sin(theta)
Note that:
0 <= m + theta <= Pi
So if (m + theta) >= Pi, then theta >= Pi - m. In [0, Pi]
we have:
cos(theta) < cos(Pi - m)
So we can use cos(Pi - m) as threshold to check whether
(m + theta) go out of [0, Pi]
Args:
embedding_size: usually 128, 256, 512 ...
class_num: num of people when training
s: scale, see normface https://arxiv.org/abs/1704.06369
m: margin, see SphereFace, CosFace, and ArcFace paper
"""
super().__init__()
self.in_features = embedding_size
self.out_features = class_num
self.s = s
self.m = m
self.weight = nn.Parameter(torch.FloatTensor(class_num, embedding_size))
nn.init.xavier_uniform_(self.weight)
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m
def forward(self, input, label):
#print(f"embding {self.in_features}, class_num {self.out_features}, input {len(input)}, label {len(label)}")
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
# print('F.normalize(input)',input.shape)
# print('F.normalize(self.weight)',F.normalize(self.weight).shape)
sine = ((1.0 - cosine.pow(2)).clamp(0, 1)).sqrt()
phi = cosine * self.cos_m - sine * self.sin_m
phi = torch.where(cosine > self.th, phi, cosine - self.mm) # drop to CosFace
#print(f'consine {cosine.shape, cosine}, sine {sine.shape, sine}, phi {phi.shape, phi}')
# update y_i by phi in cosine
output = cosine * 1.0 # make backward works
batch_size = len(output)
output[range(batch_size), label] = phi[range(batch_size), label]
# print(f'output {(output * self.s).shape}')
# print(f'phi[range(batch_size), label] {phi[range(batch_size), label]}')
return output * self.s
class CosFace(nn.Module):
def __init__(self, in_features, out_features, s=30.0, m=0.40):
"""
Args:
embedding_size: usually 128, 256, 512 ...
class_num: num of people when training
s: scale, see normface https://arxiv.org/abs/1704.06369
m: margin, see SphereFace, CosFace, and ArcFace paper
"""
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
def forward(self, input, label):
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
phi = cosine - self.m
output = cosine * 1.0 # make backward works
batch_size = len(output)
output[range(batch_size), label] = phi[range(batch_size), label]
return output * self.s

View File

@ -0,0 +1,200 @@
from torch import nn
from .utils import load_state_dict_from_url
from ..config import config as conf
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
padding = (kernel_size - 1) // 2
if norm_layer is None:
norm_layer = nn.BatchNorm2d
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
norm_layer(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
if norm_layer is None:
norm_layer = nn.BatchNorm2d
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
norm_layer(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
num_classes=conf.embedding_size,
width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8,
block=None,
norm_layer=None):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
norm_layer: Module specifying the normalization layer to use
"""
super(MobileNetV2, self).__init__()
if block is None:
block = InvertedResidual
if norm_layer is None:
norm_layer = nn.BatchNorm2d
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
x = self.features(x)
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
x = self.classifier(x)
return x
def forward(self, x):
return self._forward_impl(x)
def mobilenet_v2(pretrained=True, progress=True, **kwargs):
"""
Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MobileNetV2(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
#.load_state_dict(state_dict)
return model

View File

@ -0,0 +1,200 @@
'''MobileNetV3 in PyTorch.
See the paper "Inverted Residuals and Linear Bottlenecks:
Mobile Networks for Classification, Detection and Segmentation" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
from ..config import config as conf
class hswish(nn.Module):
def forward(self, x):
out = x * F.relu6(x + 3, inplace=True) / 6
return out
class hsigmoid(nn.Module):
def forward(self, x):
out = F.relu6(x + 3, inplace=True) / 6
return out
class SeModule(nn.Module):
def __init__(self, in_size, reduction=4):
super(SeModule, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size // reduction),
nn.ReLU(inplace=True),
nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size),
hsigmoid()
)
def forward(self, x):
return x * self.se(x)
class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
super(Block, self).__init__()
self.stride = stride
self.se = semodule
self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(expand_size)
self.nolinear1 = nolinear
self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
self.bn2 = nn.BatchNorm2d(expand_size)
self.nolinear2 = nolinear
self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_size)
self.shortcut = nn.Sequential()
if stride == 1 and in_size != out_size:
self.shortcut = nn.Sequential(
nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_size),
)
def forward(self, x):
out = self.nolinear1(self.bn1(self.conv1(x)))
out = self.nolinear2(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
if self.se != None:
out = self.se(out)
out = out + self.shortcut(x) if self.stride==1 else out
return out
class MobileNetV3_Large(nn.Module):
def __init__(self, num_classes=conf.embedding_size):
super(MobileNetV3_Large, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.hs1 = hswish()
self.bneck = nn.Sequential(
Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
Block(3, 40, 240, 80, hswish(), None, 2),
Block(3, 80, 200, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
)
self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(960)
self.hs2 = hswish()
self.linear3 = nn.Linear(960, 1280)
self.bn3 = nn.BatchNorm1d(1280)
self.hs3 = hswish()
self.linear4 = nn.Linear(1280, num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
out = self.hs1(self.bn1(self.conv1(x)))
out = self.bneck(out)
out = self.hs2(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, conf.img_size // 32)
out = out.view(out.size(0), -1)
out = self.hs3(self.bn3(self.linear3(out)))
out = self.linear4(out)
return out
class MobileNetV3_Small(nn.Module):
def __init__(self, num_classes=conf.embedding_size):
super(MobileNetV3_Small, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.hs1 = hswish()
self.bneck = nn.Sequential(
Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
)
self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(576)
self.hs2 = hswish()
self.linear3 = nn.Linear(576, 1280)
self.bn3 = nn.BatchNorm1d(1280)
self.hs3 = hswish()
self.linear4 = nn.Linear(1280, num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
out = self.hs1(self.bn1(self.conv1(x)))
out = self.bneck(out)
out = self.hs2(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, conf.img_size // 32)
out = out.view(out.size(0), -1)
out = self.hs3(self.bn3(self.linear3(out)))
out = self.linear4(out)
return out
def test():
net = MobileNetV3_Small()
x = torch.randn(2,3,224,224)
y = net(x)
print(y.size())
# test()

View File

@ -0,0 +1,265 @@
import torch
import torch.nn as nn
from einops import rearrange
from ..config import config as conf
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.SiLU()
)
def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
return nn.Sequential(
nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.SiLU()
)
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.norm(x), **kwargs)
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, hidden_dim),
nn.SiLU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class Attention(nn.Module):
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
super().__init__()
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
self.heads = heads
self.scale = dim_head ** -0.5
self.attend = nn.Softmax(dim=-1)
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
self.to_out = nn.Sequential(
nn.Linear(inner_dim, dim),
nn.Dropout(dropout)
) if project_out else nn.Identity()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
out = torch.matmul(attn, v)
out = rearrange(out, 'b p h n d -> b p n (h d)')
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
super().__init__()
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return x
class MV2Block(nn.Module):
def __init__(self, inp, oup, stride=1, expansion=4):
super().__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(inp * expansion)
self.use_res_connect = self.stride == 1 and inp == oup
if expansion == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileViTBlock(nn.Module):
def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
super().__init__()
self.ph, self.pw = patch_size
self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
self.conv2 = conv_1x1_bn(channel, dim)
self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)
self.conv3 = conv_1x1_bn(dim, channel)
self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
def forward(self, x):
y = x.clone()
# Local representations
x = self.conv1(x)
x = self.conv2(x)
# Global representations
_, _, h, w = x.shape
x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
x = self.transformer(x)
x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h // self.ph, w=w // self.pw, ph=self.ph,
pw=self.pw)
# Fusion
x = self.conv3(x)
x = torch.cat((x, y), 1)
x = self.conv4(x)
return x
class MobileViT(nn.Module):
def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
super().__init__()
ih, iw = image_size
ph, pw = patch_size
assert ih % ph == 0 and iw % pw == 0
L = [2, 4, 3]
self.conv1 = conv_nxn_bn(3, channels[0], stride=2)
self.mv2 = nn.ModuleList([])
self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion))
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion)) # Repeat
self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion))
self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
self.mvit = nn.ModuleList([])
self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0] * 2)))
self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1] * 4)))
self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2] * 4)))
self.conv2 = conv_1x1_bn(channels[-2], channels[-1])
self.pool = nn.AvgPool2d(ih // 32, 1)
self.fc = nn.Linear(channels[-1], num_classes, bias=False)
def forward(self, x):
#print('x',x.shape)
x = self.conv1(x)
x = self.mv2[0](x)
x = self.mv2[1](x)
x = self.mv2[2](x)
x = self.mv2[3](x) # Repeat
x = self.mv2[4](x)
x = self.mvit[0](x)
x = self.mv2[5](x)
x = self.mvit[1](x)
x = self.mv2[6](x)
x = self.mvit[2](x)
x = self.conv2(x)
#print('pool_before',x.shape)
x = self.pool(x).view(-1, x.shape[1])
#print('self_pool',self.pool)
#print('pool_after',x.shape)
x = self.fc(x)
return x
def mobilevit_xxs():
dims = [64, 80, 96]
channels = [16, 16, 24, 24, 48, 48, 64, 64, 80, 80, 320]
return MobileViT((256, 256), dims, channels, num_classes=1000, expansion=2)
def mobilevit_xs():
dims = [96, 120, 144]
channels = [16, 32, 48, 48, 64, 64, 80, 80, 96, 96, 384]
return MobileViT((256, 256), dims, channels, num_classes=1000)
def mobilevit_s():
dims = [144, 192, 240]
channels = [16, 32, 64, 64, 96, 96, 128, 128, 160, 160, 640]
return MobileViT((conf.img_size, conf.img_size), dims, channels, num_classes=conf.embedding_size)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
if __name__ == '__main__':
img = torch.randn(5, 3, 256, 256)
vit = mobilevit_xxs()
out = vit(img)
print(out.shape)
print(count_parameters(vit))
vit = mobilevit_xs()
out = vit(img)
print(out.shape)
print(count_parameters(vit))
vit = mobilevit_s()
out = vit(img)
print(out.shape)
print(count_parameters(vit))

View File

@ -0,0 +1,134 @@
from .CBAM import CBAM
import torch
import torch.nn as nn
from .Tool import GeM as gem
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inchannel, outchannel,stride =1,dowsample=None):
# super(Bottleneck, self).__init__()
super().__init__()
self.conv1 = nn.Conv2d(in_channels=inchannel,out_channels=outchannel, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(outchannel)
self.conv2 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel,kernel_size=3,bias=False, stride=stride,padding=1)
self.bn2 = nn.BatchNorm2d(outchannel)
self.conv3 =nn.Conv2d(in_channels=outchannel, out_channels=outchannel*self.expansion,stride=1,bias=False,kernel_size=1)
self.bn3 = nn.BatchNorm2d(outchannel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = dowsample
def forward(self, x):
self.identity = x
# print('>>>>>>>>',type(x))
if self.downsample is not None:
# print('>>>>downsample>>>>', type(self.downsample))
self.identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
# print('>>>>out>>>identity',out.size(),self.identity.size())
out = out+self.identity
out = self.relu(out)
return out
class resnet(nn.Module):
def __init__(self,block=Bottleneck, block_num=[3,4,6,3], num_class=1000):
super().__init__()
self.in_channel = 64
self.conv1 = nn.Conv2d(in_channels=3,
out_channels=self.in_channel,
stride=2,
kernel_size=7,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.cbam = CBAM(self.in_channel)
self.cbam1 = CBAM(2048)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, block_num[0],stride=1)
self.layer2 = self._make_layer(block, 128, block_num[1],stride=2)
self.layer3 = self._make_layer(block, 256, block_num[2],stride=2)
self.layer4 = self._make_layer(block, 512, block_num[3],stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.gem = gem()
self.fc = nn.Linear(512*block.expansion, num_class)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal(m.weight,mode = 'fan_out',
nonlinearity='relu')
if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 1.0)
def _make_layer(self,block ,channel, block_num, stride=1):
downsample = None
if stride !=1 or self.in_channel != channel*block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel*block.expansion,kernel_size=1,stride=stride,bias=False),
nn.BatchNorm2d(channel*block.expansion))
layer = []
layer.append(block(self.in_channel, channel, stride, downsample))
self.in_channel = channel*block.expansion
for _ in range(1, block_num):
layer.append(block(self.in_channel, channel))
return nn.Sequential(*layer)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.cbam(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.cbam1(x)
# x = self.avgpool(x)
x = self.gem(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
class TripletNet(nn.Module):
def __init__(self, num_class, flag=True):
super(TripletNet, self).__init__()
self.initnet = rescbam(num_class)
self.flag = flag
def forward(self, x1, x2=None, x3=None):
if self.flag:
output1 = self.initnet(x1)
output2 = self.initnet(x2)
output3 = self.initnet(x3)
return output1, output2, output3
else:
output = self.initnet(x1)
return output
def rescbam(num_class):
return resnet(block=Bottleneck, block_num=[3,4,6,3],num_class=num_class)
if __name__ =='__main__':
input1 = torch.randn(4,3,640,640)
input2 = torch.randn(4,3,640,640)
input3 = torch.randn(4,3,640,640)
#rescbam测试
# Resnet50 = rescbam(512)
# output = Resnet50.forward(input1)
# print(Resnet50)
#trnet测试
trnet = TripletNet(512)
output = trnet(input1, input2, input3)
print(output)

View File

@ -0,0 +1,182 @@
"""resnet in pytorch
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
Deep Residual Learning for Image Recognition
https://arxiv.org/abs/1512.03385v1
"""
import torch
import torch.nn as nn
from config import config as conf
class BasicBlock(nn.Module):
"""Basic Block for resnet 18 and resnet 34
"""
#BasicBlock and BottleNeck block
#have different output size
#we use class attribute expansion
#to distinct
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
#residual function
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
#shortcut
self.shortcut = nn.Sequential()
#the shortcut output dimension is not the same with residual function
#use 1*1 convolution to match the dimension
if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class BottleNeck(nn.Module):
"""Residual block for resnet over 50 layers
"""
expansion = 4
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion),
)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class ResNet(nn.Module):
def __init__(self, block, num_block, num_classes=conf.embedding_size):
super().__init__()
self.in_channels = 64
# self.conv1 = nn.Sequential(
# nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
# nn.BatchNorm2d(64),
# nn.ReLU(inplace=True))
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64,stride=2,kernel_size=7,padding=3,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
#we use a different inputsize than the original paper
#so conv2_x's stride is 1
self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal(m.weight,mode = 'fan_out',
nonlinearity='relu')
if isinstance(m, (nn.BatchNorm2d)):
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 1.0)
def _make_layer(self, block, out_channels, num_blocks, stride):
"""make resnet layers(by layer i didnt mean this 'layer' was the
same as a neuron netowork layer, ex. conv layer), one layer may
contain more than one residual block
Args:
block: block type, basic block or bottle neck block
out_channels: output depth channel number of this layer
num_blocks: how many blocks per layer
stride: the stride of the first block of this layer
Return:
return a resnet layer
"""
# we have num_block blocks per layer, the first block
# could be 1 or 2, other blocks would always be 1
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
output = self.conv1(x)
output = self.conv2_x(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
output = self.conv5_x(output)
print('pollBefore',output.shape)
output = self.avg_pool(output)
print('poolAfter',output.shape)
output = output.view(output.size(0), -1)
print('fcBefore',output.shape)
output = self.fc(output)
return output
def resnet18():
""" return a ResNet 18 object
"""
return ResNet(BasicBlock, [2, 2, 2, 2])
def resnet34():
""" return a ResNet 34 object
"""
return ResNet(BasicBlock, [3, 4, 6, 3])
def resnet50():
""" return a ResNet 50 object
"""
return ResNet(BottleNeck, [3, 4, 6, 3])
def resnet101():
""" return a ResNet 101 object
"""
return ResNet(BottleNeck, [3, 4, 23, 3])
def resnet152():
""" return a ResNet 152 object
"""
return ResNet(BottleNeck, [3, 8, 36, 3])

View File

@ -0,0 +1,120 @@
""" Resnet_IR_SE in ArcFace """
import torch
import torch.nn as nn
import torch.nn.functional as F
class Flatten(nn.Module):
def forward(self, x):
return x.reshape(x.shape[0], -1)
class SEConv(nn.Module):
"""Use Convolution instead of FullyConnection in SE"""
def __init__(self, channels, reduction):
super().__init__()
self.net = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(channels, channels // reduction, kernel_size=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(channels // reduction, channels, kernel_size=1, bias=False),
nn.Sigmoid(),
)
def forward(self, x):
return self.net(x) * x
class SE(nn.Module):
def __init__(self, channels, reduction):
super().__init__()
self.net = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Linear(channels, channels // reduction),
nn.ReLU(inplace=True),
nn.Linear(channels // reduction, channels),
nn.Sigmoid(),
)
def forward(self, x):
return self.net(x) * x
class IRSE(nn.Module):
def __init__(self, channels, depth, stride):
super().__init__()
if channels == depth:
self.shortcut = nn.MaxPool2d(kernel_size=1, stride=stride)
else:
self.shortcut = nn.Sequential(
nn.Conv2d(channels, depth, (1, 1), stride, bias=False),
nn.BatchNorm2d(depth),
)
self.residual = nn.Sequential(
nn.BatchNorm2d(channels),
nn.Conv2d(channels, depth, (3, 3), 1, 1, bias=False),
nn.PReLU(depth),
nn.Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
nn.BatchNorm2d(depth),
SEConv(depth, 16),
)
def forward(self, x):
return self.shortcut(x) + self.residual(x)
class ResIRSE(nn.Module):
"""Resnet50-IRSE backbone"""
def __init__(self, ih,embedding_size, drop_ratio):
super().__init__()
ih_last = ih // 16
self.input_layer = nn.Sequential(
nn.Conv2d(3, 64, (3, 3), 1, 1, bias=False),
nn.BatchNorm2d(64),
nn.PReLU(64),
)
self.output_layer = nn.Sequential(
nn.BatchNorm2d(512),
nn.Dropout(drop_ratio),
Flatten(),
nn.Linear(512 * ih_last * ih_last, embedding_size),
nn.BatchNorm1d(embedding_size),
)
# ["channels", "depth", "stride"],
self.res50_arch = [
[64, 64, 2], [64, 64, 1], [64, 64, 1],
[64, 128, 2], [128, 128, 1], [128, 128, 1], [128, 128, 1],
[128, 256, 2], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 512, 2], [512, 512, 1], [512, 512, 1],
]
self.body = nn.Sequential(*[ IRSE(a,b,c) for (a,b,c) in self.res50_arch ])
def forward(self, x):
x = self.input_layer(x)
x = self.body(x)
x = self.output_layer(x)
return x
if __name__ == "__main__":
from PIL import Image
import numpy as np
x = Image.open("../samples/009.jpg").convert('L')
x = x.resize((128, 128))
x = np.asarray(x, dtype=np.float32)
x = x[None, None, ...]
x = torch.from_numpy(x)
net = ResIRSE(512, 0.6)
net.eval()
with torch.no_grad():
out = net(x)
print(out.shape)

View File

@ -0,0 +1,384 @@
import torch
import torch.nn as nn
# from config import config as conf
from ..config import config as conf
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
#from .utils import load_state_dict_from_url
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
#print('poolBefore', x.shape)
x = self.avgpool(x)
#print('poolAfter', x.shape)
x = torch.flatten(x, 1)
#print('fcBefore',x.shape)
x = self.fc(x)
# print('fcAfter',x.shape)
return x
def forward(self, x):
return self._forward_impl(x)
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
# model = ResNet(block, layers, **kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls[arch],
# progress=progress)
# model.load_state_dict(state_dict, strict=False)
# return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
#print('state_dict',state_dict)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
return model
def resnet18(pretrained=True, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=False, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=False, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=False, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=False, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

View File

@ -0,0 +1,4 @@
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url

View File

@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 17:21:01 2024
@author: ym
"""
import numpy as np
import torch
import cv2
import torch.nn as nn
import torchvision.transforms as T
from .model import mobilevit_s, resnet18, resnet34, resnet50, mobilenet_v2, MobileNetV3_Small
from .config import config as conf
class ReIDInterface:
def __init__(self, config):
self.device = conf.device
if conf.backbone == 'resnet18':
# model = ResIRSE(img_size, embedding_size, conf.drop_ratio).to(device)
model = resnet18().to(self.device)
elif conf.backbone == 'resnet34':
model = resnet34().to(self.device)
elif conf.backbone == 'resnet50':
model = resnet50().to(self.device)
elif conf.backbone == 'mobilevit_s':
model = mobilevit_s().to(self.device)
elif conf.backbone == 'mobilenetv3':
model = MobileNetV3_Small().to(self.device)
else:
model = mobilenet_v2().to(self.device)
self.batch_size = conf.batch_size
self.embedding_size = conf.embedding_size
self.img_size = conf.img_size
self.model_path = conf.model_path
# 原输入为PIL
self.transform = T.Compose([
T.ToTensor(),
T.Resize((self.img_size, self.img_size)),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
# self.model = nn.DataParallel(model).to(self.device)
self.model = model
self.model.load_state_dict(torch.load(self.model_path, map_location=self.device))
self.model.eval()
def inference(self, images, detections):
if isinstance(images, np.ndarray):
features = self.inference_image(images, detections)
return features
batch_patches = []
patches = []
for i, img in enumerate(images):
img = img.copy()
patch = self.transform(img)
if str(self.device) != "cpu":
patch = patch.to(device=self.device).half()
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (i + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred=self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return features
def inference_image(self, image, detections):
H, W, _ = np.shape(image)
batch_patches = []
patches = []
for d in range(np.size(detections, 0)):
tlbr = detections[d, :4].astype(np.int_)
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
img = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
img = img[:, :, ::-1].copy() # the model expects RGB inputs
patch = self.transform(img)
# patch = patch.to(device=self.device).half()
if str(self.device) != "cpu":
patch = patch.to(device=self.device).half()
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (d + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred = self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return features

View File

@ -0,0 +1,462 @@
import torch
import torch.nn as nn
from tools.config import config as conf
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
# from .utils import load_state_dict_from_url
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.cam = cam
self.bam = bam
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16))
self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes)
self.sigmod = nn.Sigmoid()
if self.bam:
self.bam = SpatialAttention()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
if self.bam:
out = out*self.bam(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
self.cam = cam
self.bam = bam
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4))
self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion)
self.sigmod = nn.Sigmoid()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None, scale=0.75):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, int(64*scale), layers[0])
self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# print('poolBefore', x.shape)
x = self.avgpool(x)
# print('poolAfter', x.shape)
x = torch.flatten(x, 1)
# print('fcBefore',x.shape)
x = self.fc(x)
# print('fcAfter',x.shape)
return x
def forward(self, x):
return self._forward_impl(x)
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
# model = ResNet(block, layers, **kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls[arch],
# progress=progress)
# model.load_state_dict(state_dict, strict=False)
# return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
return model
def resnet14(pretrained=True, progress=True, **kwargs):
r"""ResNet-14 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress,
**kwargs)
def resnet18(pretrained=True, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=False, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=False, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=False, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=False, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

View File

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 19 16:10:39 2024
@author: ym
"""
import torch
from model.resnet_pre import resnet18
def main():
model_path = "best.pth"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet18().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,66 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
from functools import partial
import torch
from ultralytics.utils import IterableSimpleNamespace, yaml_load
from ultralytics.utils.checks import check_yaml
from .bot_sort import BOTSORT
from .byte_tracker import BYTETracker
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
def on_predict_start(predictor, persist=False):
"""
Initialize trackers for object tracking during prediction.
Args:
predictor (object): The predictor object to initialize trackers for.
persist (bool, optional): Whether to persist the trackers if they already exist. Defaults to False.
Raises:
AssertionError: If the tracker_type is not 'bytetrack' or 'botsort'.
"""
if hasattr(predictor, 'trackers') and persist:
return
tracker = check_yaml(predictor.args.tracker)
cfg = IterableSimpleNamespace(**yaml_load(tracker))
assert cfg.tracker_type in ['bytetrack', 'botsort'], \
f"Only support 'bytetrack' and 'botsort' for now, but got '{cfg.tracker_type}'"
trackers = []
for _ in range(predictor.dataset.bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
trackers.append(tracker)
predictor.trackers = trackers
def on_predict_postprocess_end(predictor):
"""Postprocess detected boxes and update with object tracking."""
bs = predictor.dataset.bs
im0s = predictor.batch[1]
for i in range(bs):
det = predictor.results[i].boxes.cpu().numpy()
if len(det) == 0:
continue
tracks = predictor.trackers[i].update(det, im0s[i])
if len(tracks) == 0:
continue
idx = tracks[:, -1].astype(int)
predictor.results[i] = predictor.results[i][idx]
predictor.results[i].update(boxes=torch.as_tensor(tracks[:, :-1]))
def register_tracker(model, persist):
"""
Register tracking callbacks to the model for object tracking during prediction.
Args:
model (object): The model object to register tracking callbacks for.
persist (bool): Whether to persist the trackers if they already exist.
"""
model.add_callback('on_predict_start', partial(on_predict_start, persist=persist))
model.add_callback('on_predict_postprocess_end', on_predict_postprocess_end)

View File

@ -0,0 +1,3 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license

View File

@ -0,0 +1,279 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import copy
import cv2
import numpy as np
from ultralytics.utils import LOGGER
class GMC:
def __init__(self, method='sparseOptFlow', downscale=2):
"""Initialize a video tracker with specified parameters."""
super().__init__()
self.method = method
self.downscale = max(1, int(downscale))
if self.method == 'orb':
self.detector = cv2.FastFeatureDetector_create(20)
self.extractor = cv2.ORB_create()
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
elif self.method == 'sift':
self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
elif self.method == 'ecc':
number_of_iterations = 5000
termination_eps = 1e-6
self.warp_mode = cv2.MOTION_EUCLIDEAN
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
elif self.method == 'sparseOptFlow':
self.feature_params = dict(maxCorners=1000,
qualityLevel=0.01,
minDistance=1,
blockSize=3,
useHarrisDetector=False,
k=0.04)
elif self.method in ['none', 'None', None]:
self.method = None
else:
raise ValueError(f'Error: Unknown GMC method:{method}')
self.prevFrame = None
self.prevKeyPoints = None
self.prevDescriptors = None
self.initializedFirstFrame = False
def apply(self, raw_frame, detections=None):
"""Apply object detection on a raw frame using specified method."""
if self.method in ['orb', 'sift']:
return self.applyFeatures(raw_frame, detections)
elif self.method == 'ecc':
return self.applyEcc(raw_frame, detections)
elif self.method == 'sparseOptFlow':
return self.applySparseOptFlow(raw_frame, detections)
else:
return np.eye(2, 3)
def applyEcc(self, raw_frame, detections=None):
"""Initialize."""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3, dtype=np.float32)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
# Initialization done
self.initializedFirstFrame = True
return H
# Run the ECC algorithm. The results are stored in warp_matrix.
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
try:
(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
except Exception as e:
LOGGER.warning(f'WARNING: find transform failed. Set warp as identity {e}')
return H
def applyFeatures(self, raw_frame, detections=None):
"""Initialize."""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Find the keypoints
mask = np.zeros_like(frame)
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
if detections is not None:
for det in detections:
tlbr = (det[:4] / self.downscale).astype(np.int_)
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
keypoints = self.detector.detect(frame, mask)
# Compute the descriptors
keypoints, descriptors = self.extractor.compute(frame, keypoints)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
# Initialization done
self.initializedFirstFrame = True
return H
# Match descriptors.
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
# Filtered matches based on smallest spatial distance
matches = []
spatialDistances = []
maxSpatialDistance = 0.25 * np.array([width, height])
# Handle empty matches case
if len(knnMatches) == 0:
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
for m, n in knnMatches:
if m.distance < 0.9 * n.distance:
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
currKeyPointLocation = keypoints[m.trainIdx].pt
spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
prevKeyPointLocation[1] - currKeyPointLocation[1])
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
spatialDistances.append(spatialDistance)
matches.append(m)
meanSpatialDistances = np.mean(spatialDistances, 0)
stdSpatialDistances = np.std(spatialDistances, 0)
inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
goodMatches = []
prevPoints = []
currPoints = []
for i in range(len(matches)):
if inliers[i, 0] and inliers[i, 1]:
goodMatches.append(matches[i])
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
currPoints.append(keypoints[matches[i].trainIdx].pt)
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Draw the keypoint matches on the output image
# if False:
# import matplotlib.pyplot as plt
# matches_img = np.hstack((self.prevFrame, frame))
# matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
# W = np.size(self.prevFrame, 1)
# for m in goodMatches:
# prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
# curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
# curr_pt[0] += W
# color = np.random.randint(0, 255, 3)
# color = (int(color[0]), int(color[1]), int(color[2]))
#
# matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
# matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
# matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
#
# plt.figure()
# plt.imshow(matches_img)
# plt.show()
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
LOGGER.warning('WARNING: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
def applySparseOptFlow(self, raw_frame, detections=None):
"""Initialize."""
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
# Find the keypoints
keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
# Initialization done
self.initializedFirstFrame = True
return H
# Find correspondences
matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
# Leave good correspondences only
prevPoints = []
currPoints = []
for i in range(len(status)):
if status[i]:
prevPoints.append(self.prevKeyPoints[i])
currPoints.append(matchedKeypoints[i])
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
LOGGER.warning('WARNING: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
return H

View File

@ -0,0 +1,368 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import numpy as np
import scipy.linalg
class KalmanFilterXYAH:
"""
For bytetrack. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, a, h, vx, vy, va, vh) contains the bounding box center position (x, y),
aspect ratio a, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, a, h) is taken as direct
observation of the state space (linear observation model).
"""
def __init__(self):
"""Initialize Kalman filter model matrices with motion and observation uncertainty weights."""
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current state estimate. These weights control
# the amount of uncertainty in the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""
Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2,
2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3]]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""
Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The 8 dimensional mean vector of the object state at the previous time step.
covariance : ndarray
The 8x8 dimensional covariance matrix of the object state at the previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are
initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2,
self._std_weight_position * mean[3]]
std_vel = [
self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5,
self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
# mean = np.dot(self._motion_mat, mean)
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""
Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector (8 dimensional array).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state estimate.
"""
std = [
self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1,
self._std_weight_position * mean[3]]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
Parameters
----------
mean : ndarray
The Nx8 dimensional mean matrix of the object states at the previous time step.
covariance : ndarray
The Nx8x8 dimensional covariance matrix of the object states at the previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are
initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3],
1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3]]
std_vel = [
self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3],
1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3]]
sqr = np.square(np.r_[std_pos, std_vel]).T
motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""
Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y) is the center position, a the aspect
ratio, and h the height of the bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve((chol_factor, lower),
np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
return new_mean, new_covariance
def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'):
"""
Compute gating distance between state distribution and measurements. A suitable distance threshold can be
obtained from `chi2inv95`. If `only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in format (x, y, a, h) where (x, y) is the bounding box
center position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the squared Mahalanobis distance between
(mean, covariance) and `measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
d = measurements - mean
if metric == 'gaussian':
return np.sum(d * d, axis=1)
elif metric == 'maha':
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
return np.sum(z * z, axis=0) # square maha
else:
raise ValueError('invalid distance metric')
class KalmanFilterXYWH(KalmanFilterXYAH):
"""
For BoT-SORT. A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space (x, y, w, h, vx, vy, vw, vh) contains the bounding box center position (x, y),
width w, height h, and their respective velocities.
Object motion follows a constant velocity model. The bounding box location (x, y, w, h) is taken as direct
observation of the state space (linear observation model).
"""
def initiate(self, measurement):
"""
Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, w, h) with center position (x, y), width w, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8 dimensional) of the new track.
Unobserved velocities are initialized to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[2], 2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[2], 10 * self._std_weight_velocity * measurement[3]]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""
Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The 8 dimensional mean vector of the object state at the previous time step.
covariance : ndarray
The 8x8 dimensional covariance matrix of the object state at the previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are
initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[2], self._std_weight_position * mean[3],
self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
std_vel = [
self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3],
self._std_weight_velocity * mean[2], self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""
Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector (8 dimensional array).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state estimate.
"""
std = [
self._std_weight_position * mean[2], self._std_weight_position * mean[3],
self._std_weight_position * mean[2], self._std_weight_position * mean[3]]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
Parameters
----------
mean : ndarray
The Nx8 dimensional mean matrix of the object states at the previous time step.
covariance : ndarray
The Nx8x8 dimensional covariance matrix of the object states at the previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted state. Unobserved velocities are
initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3],
self._std_weight_position * mean[:, 2], self._std_weight_position * mean[:, 3]]
std_vel = [
self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3],
self._std_weight_velocity * mean[:, 2], self._std_weight_velocity * mean[:, 3]]
sqr = np.square(np.r_[std_pos, std_vel]).T
motion_cov = [np.diag(sqr[i]) for i in range(len(mean))]
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""
Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, w, h), where (x, y) is the center position, w the width,
and h the height of the bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
return super().update(mean, covariance, measurement)

View File

@ -0,0 +1,215 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import numpy as np
import math
import torch
import scipy
from scipy.spatial.distance import cdist
# from ultralytics.utils.metrics import bbox_ioa
try:
import lap # for linear_assignment
assert lap.__version__ # verify package is not directory
except (ImportError, AssertionError, AttributeError):
from ultralytics.utils.checks import check_requirements
check_requirements('lapx>=0.5.2') # update to lap package from https://github.com/rathaROG/lapx
import lap
def bbox_iou(box1, box2, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
'''由根目录下 utils.metrics.metrics.bbox_iou 更改而来'''
# Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
# Get the coordinates of bounding boxes
# x1, y1, x2, y2 = box1
# box1 = torch.tensor(box1)
# box2 = torch.tensor(box2)
b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
w1, h1 = b1_x2 - b1_x1, (b1_y2 - b1_y1).clip(eps)
w2, h2 = b2_x2 - b2_x1, (b2_y2 - b2_y1).clip(eps)
# Intersection area
# inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * \
# (b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp(0)
inter = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
# Union Area
box1_area = w1 * h1
box2_area = w2 * h2
union = box1_area[:, None] + box2_area - inter + eps
# IoU
iou = inter / union
if CIoU or DIoU or GIoU:
cw = np.maximum(b1_x2[:, None], b2_x2) - np.minimum(b1_x1[:, None], b2_x1) # convex (smallest enclosing box) width
ch = np.maximum(b1_y2[:, None], b2_y2) - np.minimum(b1_y1[:, None], b2_y1) # convex height
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
'''center dist ** 2'''
rho2 = ((b1_x1[:, None] + b1_x2[:, None] - b2_x1 - b2_x2) ** 2 + \
(b1_y1[:, None] + b1_y2[:, None] - b2_y1 - b2_y2) ** 2) / 4
if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
v = (4 / math.pi ** 2) * (np.arctan(w1 / h1)[:, None] - np.arctan(w2 / h2))**2
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
return iou - rho2 / c2 # DIoU
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf
return iou # IoU
def bbox_ioa(box1, box2, iou=False, eps=1e-7):
"""
Calculate the intersection over box2 area given box1 and box2. Boxes are in x1y1x2y2 format.
Args:
box1 (np.array): A numpy array of shape (n, 4) representing n bounding boxes.
box2 (np.array): A numpy array of shape (m, 4) representing m bounding boxes.
iou (bool): Calculate the standard iou if True else return inter_area/box2_area.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(np.array): A numpy array of shape (n, m) representing the intersection over box2 area.
"""
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1.T
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
# Intersection area
inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \
(np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0)
# box2 area
area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)
if iou:
box1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
area = area + box1_area[:, None] - inter_area
# Intersection over box2 area
return inter_area / (area + eps)
# def linear_assignment(cost_matrix, thresh, use_lap=True):
def linear_assignment(cost_matrix: np.ndarray, thresh: float, use_lap: bool = True) -> tuple:
"""
Perform linear assignment using scipy or lap.lapjv.
Args:
cost_matrix (np.ndarray): The matrix containing cost values for assignments.
thresh (float): Threshold for considering an assignment valid.
use_lap (bool, optional): Whether to use lap.lapjv. Defaults to True.
Returns:
(tuple): Tuple containing matched indices, unmatched indices from 'a', and unmatched indices from 'b'.
"""
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
if use_lap:
# https://github.com/gatagat/lap
_, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
matches = [[ix, mx] for ix, mx in enumerate(x) if mx >= 0]
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
else:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html
x, y = scipy.optimize.linear_sum_assignment(cost_matrix) # row x, col y
matches = np.asarray([[x[i], y[i]] for i in range(len(x)) if cost_matrix[x[i], y[i]] <= thresh])
if len(matches) == 0:
unmatched_a = list(np.arange(cost_matrix.shape[0]))
unmatched_b = list(np.arange(cost_matrix.shape[1]))
else:
unmatched_a = list(set(np.arange(cost_matrix.shape[0])) - set(matches[:, 0]))
unmatched_b = list(set(np.arange(cost_matrix.shape[1])) - set(matches[:, 1]))
return matches, unmatched_a, unmatched_b
# def iou_distance(atracks, btracks):
def iou_distance(atracks: list, btracks: list) -> np.ndarray:
"""
Compute cost based on Intersection over Union (IoU) between tracks.
Args:
atracks (list[STrack] | list[np.ndarray]): List of tracks 'a' or bounding boxes.
btracks (list[STrack] | list[np.ndarray]): List of tracks 'b' or bounding boxes.
Returns:
(np.ndarray): Cost matrix computed based on IoU.
"""
if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) \
or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)
if len(atlbrs) and len(btlbrs):
box1 = np.ascontiguousarray(atlbrs, dtype=np.float32)
box2 = np.ascontiguousarray(btlbrs, dtype=np.float32)
ious = bbox_ioa(box1, box2, iou=True)
ious_g = bbox_iou(box1, box2, GIoU=True).clip(-1.0, 1.0)
ious_d = bbox_iou(box1, box2, DIoU=True).clip(-1.0, 1.0)
ious_c = bbox_iou(box1, box2, CIoU=True).clip(-1.0, 1.0)
return 1 - ious # cost matrix
def embedding_distance(tracks, detections, metric='cosine'):
"""
Compute distance between tracks and detections based on embeddings.
Args:
tracks (list[STrack]): List of tracks.
detections (list[BaseTrack]): List of detections.
metric (str, optional): Metric for distance computation. Defaults to 'cosine'.
Returns:
(np.ndarray): Cost matrix computed based on embeddings.
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)
# for i, track in enumerate(tracks):
# cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Normalized features
return cost_matrix
def fuse_score(cost_matrix, detections):
"""
Fuses cost matrix with detection scores to produce a single similarity matrix.
Args:
cost_matrix (np.ndarray): The matrix containing cost values for assignments.
detections (list[BaseTrack]): List of detections with scores.
Returns:
(np.ndarray): Fused similarity matrix.
"""
if cost_matrix.size == 0:
return cost_matrix
iou_sim = 1 - cost_matrix
det_scores = np.array([det.score for det in detections])
det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
fuse_sim = iou_sim * det_scores
return 1 - fuse_sim # fuse_cost

View File

@ -0,0 +1,156 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 17:33:00 2023
@author: ym
"""
import cv2
import os
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
import pandas as pd
from scipy.spatial.distance import cdist
from pathlib import Path
# ================= using for import ultralytics
import sys
sys.path.append(r"D:\DetectTracking")
from utils.gen import Profile
from dotrack.dotracks_back import doBackTracks
from dotrack.dotracks_front import doFrontTracks
from utils.drawtracks import draw5points, drawTrack, drawtracefeat, plot_frameID_y2, drawFeatures, draw_all_trajectories
# from datetime import datetime
# from utils.proBoxes import boxes_add_fid
# from utils.plotting import boxing_img #, Annotator, colors,
# from utils import Boxes, IterableSimpleNamespace, yaml_load
# from trackers import BOTSORT, BYTETracker
# from utils.mergetrack import track_equal_track
# from utils.basetrack import MoveState, ShoppingCart, doTracks
def detect_start_end(bboxes, features_dict, filename):
boxes = np.empty(shape=(0, 9), dtype = np.float)
if filename.find("back") >= 0:
vts = doBackTracks(bboxes, features_dict)
vtx = [t for t in vts if t.cls != 0]
for track in vtx:
if track.moving_index.size:
boxes = np.concatenate((boxes, track.moving_index), axis=0)
elif filename.find("front") >= 0:
vts = doFrontTracks(bboxes, features_dict)
vtx = [t for t in vts if t.cls != 0]
for track in vtx:
for start, end in track.dynamic_y2:
boxes = np.concatenate((boxes, track.boxes[start:end+1, :]), axis=0)
for start, end in track.dynamic_y1:
boxes = np.concatenate((boxes, track.boxes[start:end+1, :]), axis=0)
start = np.min(boxes[:, 0])
end = np.max(boxes[:, 1])
if start > 5:
start = start - 5
else:
start = 0
return start, end
def save_subimgs(vts, file, TracksDict):
imgdir = Path(f'./result/imgs/{file}')
if not imgdir.exists():
imgdir.mkdir(parents=True, exist_ok=True)
for i, track in enumerate(vts.Residual):
boxes = track.boxes
for ii in range(len(boxes)):
tid, fid, bid = int(boxes[ii, 4]), int(boxes[ii, 7]), int(boxes[ii, 8])
img = TracksDict[f"frame_{fid}"]["imgs"][bid]
# feat = TracksDict[f"frame_{fid}"]["feats"][bid]
# box = TracksDict[f"frame_{fid}"]["boxes"][bid]
cv2.imwrite(str(imgdir) + f"/{tid}_{fid}_{bid}.png", img)
def have_tracked():
trackdict = r'./data/trackdicts'
alltracks = []
k = 0
gt = Profile()
for filename in os.listdir(trackdict):
filename = '153112511_0_seek_105.pkl'
file, ext = os.path.splitext(filename)
filepath = os.path.join(trackdict, filename)
TracksDict = np.load(filepath, allow_pickle=True)
bboxes = TracksDict['TrackBoxes']
with gt:
if filename.find("front") >= 0:
vts = doFrontTracks(bboxes, TracksDict)
Intrude = vts.isintrude()
vts.classify()
save_subimgs(vts, file, TracksDict)
plt = plot_frameID_y2(vts)
savedir = save_dir.joinpath(f'{file}_y2.png')
plt.savefig(savedir)
plt.close()
edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
img_tracking = draw_all_trajectories(vts, edgeline, save_dir, file, draw5p=True)
else:
vts = doBackTracks(bboxes, TracksDict)
Intrude = vts.isintrude()
vts.classify()
alltracks.append(vts)
save_subimgs(vts, file, TracksDict)
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
img_tracking = draw_all_trajectories(vts, edgeline, save_dir, file)
trackpath = save_dir.joinpath(f'{file}.png')
cv2.imwrite(str(trackpath), img_tracking)
print(file+f" need time: {gt.dt:.2f}s")
k += 1
if k==1:
break
if len(alltracks):
drawFeatures(alltracks, save_dir)
if __name__ == "__main__":
# now = datetime.now()
# time_string = now.strftime("%Y%m%d%H%M%S")[:8]
save_dir = Path('./result/tracks')
if not save_dir.exists():
save_dir.mkdir(parents=True, exist_ok=True)
have_tracked()

View File

@ -0,0 +1,280 @@
"""
TRACLUS: A Trajectory Clustering Algorithm (A Partition and Group Framework)
Implemented for Python 3
This is an implementation of the TRACLUS algorithm as described in the paper:
"Trajectory Clustering: A Partition-and-Group Framework"
by Lee, Han, & Whang (2007) [http://hanj.cs.illinois.edu/pdf/sigmod07_jglee.pdf]
Implementation Author: Adriel Isaiah V. Amoguis (De La Salle University)
Implementation Date: 2023-03-19
This implementation was done as part of the algorithms required for the implementation author's
undergraduate thesis. The implementation is not guaranteed to be bug-free and may not be optimized
for certain use-cases. The implementation author is not responsible for any damages caused by the
use of this implementation. Use at your own risk. End-users are encouraged to examine the code
in the case of any issues. If you find any bugs, please report them to the implementation author
via the repository's issues page on GitHub.
"""
import argparse
import numpy as np
from sklearn.cluster import OPTICS
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import euclidean as d_euclidean
import pickle
import os
import warnings
# UTILITY FUNCTIONS
def load_trajectories(filepath):
"""
Load the trajectories from a pickle file.
"""
if not os.path.exists(filepath):
raise FileNotFoundError("File not found at {}".format(filepath))
with open(filepath, 'rb') as f:
trajectories = pickle.load(f)
return trajectories
def get_point_projection_on_line(point, line):
"""
Get the projection of a point on a line.
"""
# Get the slope of the line using the start and end points
line_slope = (line[-1, 1] - line[0, 1]) / (line[-1, 0] - line[0, 0]) if line[-1, 0] != line[0, 0] else np.inf
# In case the slope is infinite, we can directly get the projection
if np.isinf(line_slope):
return np.array([line[0,0], point[1]])
# Convert the slope to a rotation matrix
R = slope_to_rotation_matrix(line_slope)
# Rotate the line and point
rot_line = np.matmul(line, R.T)
rot_point = np.matmul(point, R.T)
# Get the projection
proj = np.array([rot_point[0], rot_line[0,1]])
# Undo the rotation for the projection
R_inverse = np.linalg.inv(R)
proj = np.matmul(proj, R_inverse.T)
return proj
################# EQUATIONS #################
# Euclidean Distance : Accepts two points of type np.ndarray([x,y])
# DEPRECATED IN FAVOR OF THE SCIPY IMPLEMENTATION OF THE EUCLIDEAN DISTANCE
# d_euclidean = lambda p1, p2: np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)
# Perpendicular Distance
def d_perpendicular(l1, l2):
"""
Calculate the perpendicular distance between two lines.
"""
# Find the shorter line and assign that as l_shorter
l_shorter = l_longer = None
l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1])
if l1_len < l2_len:
l_shorter = l1
l_longer = l2
else:
l_shorter = l2
l_longer = l1
ps = get_point_projection_on_line(l_shorter[0], l_longer)
pe = get_point_projection_on_line(l_shorter[-1], l_longer)
lehmer_1 = d_euclidean(l_shorter[0], ps)
lehmer_2 = d_euclidean(l_shorter[-1], pe)
if lehmer_1 == 0 and lehmer_2 == 0:
return 0
return (lehmer_1**2 + lehmer_2**2) / (lehmer_1 + lehmer_2)#, ps, pe, l_shorter[0], l_shorter[-1]
# Parallel Distance
def d_parallel(l1, l2):
"""
Calculate the parallel distance between two lines.
"""
# Find the shorter line and assign that as l_shorter
l_shorter = l_longer = None
l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1])
if l1_len < l2_len:
l_shorter = l1
l_longer = l2
else:
l_shorter = l2
l_longer = l1
ps = get_point_projection_on_line(l_shorter[0], l_longer)
pe = get_point_projection_on_line(l_shorter[-1], l_longer)
parallel_1 = min(d_euclidean(l_longer[0], ps), d_euclidean(l_longer[-1], ps))
parallel_2 = min(d_euclidean(l_longer[0], pe), d_euclidean(l_longer[-1], pe))
return min(parallel_1, parallel_2)
# Angular Distance
def d_angular(l1, l2, directional=True):
"""
Calculate the angular distance between two lines.
"""
# Find the shorter line and assign that as l_shorter
l_shorter = l_longer = None
l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1])
if l1_len < l2_len:
l_shorter = l1
l_longer = l2
else:
l_shorter = l2
l_longer = l1
# Get the minimum intersecting angle between both lines
shorter_slope = (l_shorter[-1,1] - l_shorter[0,1]) / (l_shorter[-1,0] - l_shorter[0,0]) if l_shorter[-1,0] - l_shorter[0,0] != 0 else np.inf
longer_slope = (l_longer[-1,1] - l_longer[0,1]) / (l_longer[-1,0] - l_longer[0,0]) if l_longer[-1,0] - l_longer[0,0] != 0 else np.inf
# The case of a vertical line
theta = None
if np.isinf(shorter_slope):
# Get the angle of the longer line with the x-axis and subtract it from 90 degrees
tan_theta0 = longer_slope
tan_theta1 = tan_theta0 * -1
theta0 = np.abs(np.arctan(tan_theta0))
theta1 = np.abs(np.arctan(tan_theta1))
theta = min(theta0, theta1)
elif np.isinf(longer_slope):
# Get the angle of the shorter line with the x-axis and subtract it from 90 degrees
tan_theta0 = shorter_slope
tan_theta1 = tan_theta0 * -1
theta0 = np.abs(np.arctan(tan_theta0))
theta1 = np.abs(np.arctan(tan_theta1))
theta = min(theta0, theta1)
else:
tan_theta0 = (shorter_slope - longer_slope) / (1 + shorter_slope * longer_slope)
tan_theta1 = tan_theta0 * -1
theta0 = np.abs(np.arctan(tan_theta0))
theta1 = np.abs(np.arctan(tan_theta1))
theta = min(theta0, theta1)
if directional:
return np.sin(theta) * d_euclidean(l_longer[0], l_longer[-1])
if 0 <= theta < (90 * np.pi / 180):
return np.sin(theta) * d_euclidean(l_longer[0], l_longer[-1])
elif (90 * np.pi / 180) <= theta <= np.pi:
return np.sin(theta)
else:
raise ValueError("Theta is not in the range of 0 to 180 degrees.")
# Total Trajectory Distance
def distance(l1, l2, directional=True, w_perpendicular=1, w_parallel=1, w_angular=1):
"""
Get the total trajectory distance using all three distance formulas.
"""
perpendicular_distance = d_perpendicular(l1, l2)
parallel_distance = d_parallel(l1, l2)
angular_distance = d_angular(l1, l2, directional=directional)
return (w_perpendicular * perpendicular_distance) + (w_parallel * parallel_distance) + (w_angular * angular_distance)
# Minimum Description Length
def minimum_desription_length(start_idx, curr_idx, trajectory, w_angular=1, w_perpendicular=1, par=True, directional=True):
"""
Calculate the minimum description length.
"""
LH = LDH = 0
for i in range(start_idx, curr_idx-1):
ed = d_euclidean(trajectory[i], trajectory[i+1])
# print("ed:", ed)
LH += max(0, np.log2(ed, where=ed>0))
if par:
for j in range(start_idx, i-1):
# print()
# print(np.array([trajectory[start_idx], trajectory[i]]))
# print(np.array([trajectory[j], trajectory[j+1]]))
LDH += w_perpendicular * d_perpendicular(np.array([trajectory[start_idx], trajectory[i]]), np.array([trajectory[j], trajectory[j+1]]))
LDH += w_angular * d_angular(np.array([trajectory[start_idx], trajectory[i]]), np.array([trajectory[j], trajectory[j+1]]), directional=directional)
# print("LDH:", LDH)
if par:
return LDH + LH
return LH
# Slope to rotation matrix
def slope_to_rotation_matrix(slope):
"""
Convert slope to rotation matrix.
"""
return np.array([[1, slope], [-slope, 1]])
#############################################
def partition(trajectory, directional=True, progress_bar=False, edis=30, w_perpendicular=1, w_angular=1):
"""
Partition a trajectory into segments.
"""
# Ensure that the trajectory is a numpy array of shape (n, 2)
if not isinstance(trajectory, np.ndarray):
raise TypeError("Trajectory must be a numpy array")
elif trajectory.shape[1] != 2:
raise ValueError("Trajectory must be a numpy array of shape (n, 2)")
# Initialize the characteristic points, add the first point as a characteristic point
cp_indices = []
cp_indices.append(0)
traj_len = trajectory.shape[0]
start_idx = 0
length = 1
while start_idx + length < traj_len:
if progress_bar:
print(f'\r{round(((start_idx + length) / traj_len) * 100, 2)}%', end='')
# print(f'Current Index: {start_idx + length}, Trajectory Length: {traj_len}')
curr_idx = start_idx + length
# print(start_idx, curr_idx)
# print(f"Current Index: {curr_idx}, Current point: {trajectory[curr_idx]}")
cost_par = minimum_desription_length(start_idx, curr_idx, trajectory, w_angular=w_angular, w_perpendicular=w_perpendicular, directional=directional)
cost_nopar = minimum_desription_length(start_idx, curr_idx, trajectory, par=False, directional=directional)
# cost_par += 0 if 1 - cos == 0 else w_feats / (1 - cos)
# print(f'Cost with partition: {cost_par}, Cost without partition: {cost_nopar}')
if cost_par > cost_nopar and d_euclidean(trajectory[start_idx], trajectory[curr_idx]) > edis:
# print('edp:', d_euclidean(trajectory[start_idx], trajectory[curr_idx]))
# print(f"Added characteristic point: {trajectory[curr_idx-1]} with index {curr_idx-1}")
cp_indices.append(curr_idx-1)
start_idx = curr_idx-1
length = 1
else:
length += 1
# Add last point to characteristic points
cp_indices.append(len(trajectory) - 1)
# print(cp_indices)
return np.array([trajectory[i] for i in cp_indices]), cp_indices
# Create the script version that takes in a file path for inputs
if __name__ == "__main__":
# Parse the arguments
parser = argparse.ArgumentParser(description="Trajectory Clustering Algorithm")
parser.add_argument("input_file", help="The input file path (pickle format)")
parser.add_argument("-p", "--progress_bar", help="Show the progress bar", action="store_true")
args = parser.parse_args()
# Load the trajectories
trajectories = load_trajectories(args.input_file)
# Run the partition algorithm
partitions, indices = tr.partition(points, progress_bar=args.progress_bar, w_perpendicular=100, w_angular=10)

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 31 17:06:34 2023
@author: ym
"""
from .proBoxes import Boxes, boxes_add_fid
from .iterYaml import IterableSimpleNamespace, yaml_load
__all__ = "IterableSimpleNamespace", "yaml_load", "Boxes", "boxes_add_fid"

View File

@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 14:21:13 2023
@author: ym
"""
import cv2
# import sys
# sys.path.append(r"D:\DeepLearning\yolov5")
# from ultralytics.utils.plotting import Annotator, colors
from .plotting import Annotator, colors
class TrackAnnotator(Annotator):
def plotting_track(self, track, names='abc'):
"""
track[x, y, w, h, track_id, score, cls, frame_index]
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
id跟踪id从 1 开始计数,
frame_index: 帧索引,从 1 开始计数
cls类别编号从 0 开始计数,用作 names 的 key 值
"""
if track.size==0: return
id, cls = track[0, 4], track[0, 6]
if id >=0 and cls==0:
color = colors(int(cls), True)
elif id >=0 and cls!=0:
color = colors(int(id), True) # 不存在 id = 0不会和上面产生冲突
else:
color = colors(19, True) # 19为调色板的最后一个元素
nb = track.shape[0]
for i in range(nb):
if i == 0:
# label = f'{int(track[i, 4])}:({int(track[i, 7])})'
label = f'ID_{int(track[i, 4])}'
elif i == nb-1:
label = ''
# label = f'{int(track[i, 4])}:({int(track[i, 7])})&{int(nb)}'
else:
label = ''
self.circle_label(track[i, :], label, color=color)
def circle_label(self, track, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
"""
绘制选定 track 的轨迹
"""
if track.size==0: return
x, y = int((track[0]+track[2])/2), int((track[1]+track[3])/2)
cv2.circle(self.im, (x, y), 6, color, 2)
# txt_color = (0,0,0)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = x + w <= self.im.shape[1]-3
# p2 = x + w, y - h - 3 if outside else y + h + 3
# cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(self.im,
label, (x-10 if outside else x-w+2, y-20),
0,
# self.lw / 3,
self.lw/2,
txt_color,
thickness=tf,
lineType=cv2.LINE_AA)

View File

@ -0,0 +1,387 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 15 15:26:38 2024
@author: ym
"""
import numpy as np
import cv2
import os
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from tracking.utils.annotator import TrackAnnotator
from tracking.utils.plotting import colors
def plot_frameID_y2(vts):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
markers = ['o', 'v', '^', '<', '>', 's', 'p', 'P','*', '+', 'x', 'X', 'd', 'D', 'H']
colors = ['b', 'g', 'c', 'm', 'y', ]
bboxes = vts.bboxes
maxfid = max(vts.bboxes[:, 7])
CART_HIGH_THRESH1 = 430
TRACK_STATIC_THRESH = 8
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(2, 1, left=0.1, right=0.9, bottom=0.1, top=0.9,
wspace=0.05, hspace=0.15)
# ax1, ax2 = axs
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[1,0])
ax1.plot((0, maxfid+5), (1280-CART_HIGH_THRESH1, 1280-CART_HIGH_THRESH1), 'b--', linewidth=2 )
ax2.plot((0, maxfid+5), (1280-CART_HIGH_THRESH1, 1280-CART_HIGH_THRESH1), 'b--', linewidth=2 )
hands = [t for t in vts.Hands if not t.isHandStatic]
tracks = vts.join_tracks(vts.Residual, hands)
for i, track in enumerate(vts.tracks):
boxes = track.boxes
cls, tid = track.cls, track.tid
y2, fids = boxes[:, 3], boxes[:, 7]
if cls==0:
ax1.scatter(fids, 1280-y2, marker='4', s=50, color=colors[tid%len(colors)], label = f"ID_{tid}")
else:
ax1.scatter(fids, 1280-y2, marker=markers[tid%len(markers)], color=colors[tid%len(colors)],
s=50, label = f"ID_{tid}")
# hist, bins = np.histogram(1280-y2, bins='auto')
ax1.set_ylim([-50, 1350])
for i, track in enumerate(tracks):
boxes = track.boxes
cls, tid = track.cls, track.tid
y2, fids = boxes[:, 3], boxes[:, 7]
if cls==0:
ax2.scatter(fids, 1280-y2, marker='4', s=50, color=colors[tid%len(colors)], label = f"ID_{tid}")
else:
ax2.scatter(fids, 1280-y2, marker=markers[tid%len(markers)], color=colors[tid%len(colors)],
s=50, label = f"ID_{tid}")
# hist, bins = np.histogram(1280-y2, bins='auto')
ax2.set_ylim([-50, 1350])
ax1.grid(True), ax1.set_xlim(0, maxfid+5), ax1.set_title('y2')
ax1.legend()
ax2.grid(True), ax2.set_xlim(0, maxfid+5), ax2.set_title('y2')
ax2.legend()
# plt.show()
return plt
def draw_all_trajectories(vts, edgeline, save_dir, file, draw5p=False):
'''显示四种类型结果'''
# file, ext = os.path.splitext(filename)
# edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
# edgeline2 = edgeline1.copy()
# edgeline = np.concatenate((edgeline1, edgeline2), exis = 1)
if not isinstance(save_dir, Path): save_dir = Path(save_dir)
''' all tracks 中心轨迹'''
img1, img2 = edgeline.copy(), edgeline.copy()
img1 = drawTrack(vts.tracks, img1)
img2 = drawTrack(vts.Residual, img2)
imgcat = np.concatenate((img1, img2), axis = 1)
H, W = imgcat.shape[:2]
cv2.line(imgcat, (int(W/2), 0), (int(W/2), H), (128, 255, 128), 2)
# imgpth = save_dir.joinpath(f"{file}_show.png")
# cv2.imwrite(str(imgpth), img)
if not draw5p:
return imgcat
''' tracks 5点轨迹'''
trackpth = save_dir / Path("trajectory")
if not trackpth.exists():
trackpth.mkdir(parents=True, exist_ok=True)
for track in vts.tracks:
# if track.cls != 0:
img = edgeline.copy()
img = draw5points(track, img)
pth = trackpth.joinpath(f"{file}_{track.tid}_.png")
cv2.imwrite(str(pth), img)
# for track in vts.Residual:
# # if track.cls != 0:
# img = edgeline.copy()
# img = draw5points(track, img)
# pth = trackpth.joinpath(f"{file}_{track.tid}_.png")
# cv2.imwrite(str(pth), img)
return imgcat
# =============================================================================
# '''3. moving tracks 中心轨迹'''
# filename2 = f"{file}_show_r.png"
# img = edgeline.copy()
# img = drawTrack(vts.Residual, img)
# pth = save_dir.joinpath(filename2)
# cv2.imwrite(pth, img)
# =============================================================================
'''5. tracks 时序trajmin、trajmax、arearate、incartrate'''
# plt = drawtracefeat(vts)
# pth = save_dir.joinpath(f"{file}_x.png")
# plt.savefig(pth)
# plt.close('all')
def drawFeatures(allvts, save_dir):
# [trajlen_min, trajdist_max, trajlen_rate, trajist_rate]]
feats = [track.TrajFeat for vts in allvts for track in vts.tracks]
feats = np.array(feats)
fig, ax = plt.subplots()
ax.scatter(feats[:,3], feats[:, 1], s=10)
# ax.set_xlim(0, 2)
# ax.set_ylim(0, 100)
ax.grid(True)
plt.show()
pth = save_dir.joinpath("scatter.png")
plt.savefig(pth)
plt.close('all')
def drawtracefeat(vts):
'''
需要对曲线进行特征提取和分类
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
# matplotlib.use('Agg')
fid = vts.frameid
fid1, fid2 = min(fid), max(fid)
fig, axs = plt.subplots(2, 2,figsize=(18, 8))
kernel = [0.15, 0.7, 0.15]
for i, track in enumerate(vts.tracks):
boxes = track.boxes
tid = int(track.tid)
cls = int(track.cls)
posState = track.posState
if track.frnum>=5:
x1 = boxes[1:, 7]
y1 = track.trajmin
x11 = [i for i in range(int(min(x1)), int(max(x1)+1))]
y11 = np.interp(x11, x1, y1)
y11[1:-1] = np.convolve(y11, kernel, 'valid')
x3 = boxes[1:, 7]
y3 = track.trajmax
x33 = [i for i in range(int(min(x3)), int(max(x3)+1))]
y33 = np.interp(x33, x3, y3)
y33[1:-1] = np.convolve(y33, kernel, 'valid')
x2 = boxes[:, 7]
# y2 = track.Area/max(track.Area) - min(track.Area/max(track.Area))
y2 = track.Area/max(track.Area)
x22 = [i for i in range(int(min(x2)), int(max(x2)+1))]
y22 = np.interp(x22, x2, y2)
y22[1:-1] = np.convolve(y22, kernel, 'valid')
x4 = boxes[:, 7]
y4 = track.incartrates
x44 = [i for i in range(int(min(x4)), int(max(x4)+1))]
y44 = np.interp(x44, x4, y4)
y44[1:-1] = np.convolve(y44, kernel, 'valid')
elif track.frnum>=2:
x11 = boxes[1:, 7]
y11 = track.trajmin
x33 = boxes[1:, 7]
y33 = track.trajmax
x22 = boxes[:, 7]
# y22 = track.Area/max(track.Area) - min(track.Area/max(track.Area))
y22 = track.Area/max(track.Area)
x44 = boxes[:, 7]
y44 = track.incartrates
else:
continue
# cls!=0, max(y)>20
if cls!=0 and cls!=9 and posState>=2 and max(y11)>10 and max(y33)>10 and max(y22>0.1):
axs[0, 0].plot(x11, y11, label=f"ID_{tid}")
axs[0, 0].legend()
# axs[0].set_ylim(0, 100)
axs[0, 1].plot(x22, y22, label=f"ID_{tid}")
axs[0, 1].legend()
axs[1, 0].plot(x33, y33, label=f"ID_{tid}")
axs[1, 0].legend()
axs[1, 1].plot(x44, y44, label=f"ID_{tid}")
axs[1, 1].legend()
axs[0, 0].grid(True), axs[0, 0].set_xlim(fid1, fid2+10), axs[0, 0].set_title('trajmin')
axs[0, 1].grid(True), axs[0, 1].set_xlim(fid1, fid2+10), axs[0, 1].set_title('arearate')
axs[1, 0].grid(True), axs[1, 0].set_xlim(fid1, fid2+10), axs[1, 0].set_title('trajmax')
axs[1, 1].grid(True), axs[1, 1].set_xlim(fid1, fid2+10), axs[1, 1].set_ylim(-0.1, 1.1)
axs[1, 1].set_title('incartrate')
# pth = save_dir.joinpath(f"{file}_show_x.png")
# plt.savefig(pth)
# plt.savefig(f"./result/cls11_80212_time/{file}_show_x.png")
# plt.show()
return plt
def draw5points(track, img):
"""
显示中心点、4角点的轨迹以及轨迹 features
"""
colorx = np.array([[255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255],
[0, 0, 255], [0, 255, 0], [255, 51, 255], [102, 178, 255], [51, 153, 255],[255, 153, 153],
[255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], [51, 255, 51],
[255, 102, 255], [153, 204, 255], [255, 0, 0], [255, 255, 255]], dtype=np.uint8)
color = ((0, 0, 255), (255, 128, 0))
# img = cv2.imread("./shopcart/cart_tempt/edgeline.png")
boxes = track.boxes
cornpoints = track.cornpoints
trajlens = [int(t) for t in track.trajlens]
trajdist = [int(t) for t in track.trajdist]
if len(track.trajmin):
trajstd = np.std(track.trajmin)
else:
trajstd = 0
trajlen_min, trajlen_max, trajdist_min, trajdist_max, trajlen_rate, trajdist_rate = track.TrajFeat
for i in range(boxes.shape[0]):
cv2.circle(img, (int(cornpoints[i, 0]), int(cornpoints[i, 1])), 6, (255, 255, 255), 2)
cv2.circle(img, (int(cornpoints[i, 2]), int(cornpoints[i, 3])), 6, (255, 0, 255), 2)
cv2.circle(img, (int(cornpoints[i, 4]), int(cornpoints[i, 5])), 6, (0, 255, 0), 2)
cv2.circle(img, (int(cornpoints[i, 6]), int(cornpoints[i, 7])), 6, (64, 128, 255), 2)
cv2.circle(img, (int(cornpoints[i, 8]), int(cornpoints[i, 9])), 6, (255, 128, 64), 2)
label_0 = f"ID: {track.tid}, Class: {track.cls}"
label_1 = f"trajlens: {trajlens}, trajlen_min: {int(trajlen_min)}"
label_2 = f"trajdist: {trajdist}: trajdist_max: {int(trajdist_max)}"
label_3 = "trajlen_min/trajlen_max: {:.2f}/{:.2f} = {:.2f}".format(trajlen_min, trajlen_max, trajlen_rate)
label_4 = "trajdist_min/mwh : {:.2f}/{:.2f} = {:.2f}".format(trajdist_min, track.mwh, trajdist_rate)
label_5 = "std(trajmin) : {:.2f}".format(trajstd)
label_6 = "PCA(variance_ratio) : "
label_7 = "Rect W&H&Ratio : "
label_8 = ""
# label_8 = "IOU of incart/maxbox/minbox: {:.2f}, {:.2f}, {:.2f}".format(
# track.feature_ious[0], track.feature_ious[3], track.feature_ious[4])
'''=============== 最小轨迹长度索引 ===================='''
trajlens = [int(t) for t in track.trajrects_wh]
if track.isCornpoint:
idx = 0
else:
idx = trajlens.index(min(trajlens))
'''=============== PCA ===================='''
if trajlens[idx] > 12:
X = cornpoints[:, 2*idx:2*(idx+1)]
pca = PCA()
pca.fit(X)
label_6 = "PCA(variance_ratio): {:.2f}".format(pca.explained_variance_ratio_[0])
# if sum(np.isnan(pca.explained_variance_ratio_)) == 0:
for i, (comp, var) in enumerate(zip(pca.components_, pca.explained_variance_ratio_)):
pt1 = (pca.mean_ - comp*var*200).astype(np.int64)
pt2 = (pca.mean_ + comp*var*200).astype(np.int64)
cv2.line(img, pt1, pt2, color=color[i], thickness=2)
'''=============== RECT ===================='''
rect = track.trajrects[idx]
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(img, [box], 0, (0, 255, 0), 2)
label_7 = "Rect W&H&Ratio: {}, {}, {:.2f}".format(int(rect[1][0]), int(rect[1][1]), min(rect[1])/(max(rect[1])+0.001))
'''=============== 显示文字 ===================='''
# label = [label_0, label_1, label_2, label_3, label_4, label_5, label_6, label_7, label_8]
# w, h = cv2.getTextSize('abc', 0, fontScale=2, thickness=1)[0]
# for i in range(len(label)):
# cv2.putText(img, label[i], (20, int((i+1)*1.1*h)), 0, 1,
# [int(x) for x in colorx[i]], 2, lineType=cv2.LINE_AA)
# pth = save_dir.joinpath(f"{file}_{track.tid}.png")
# cv2.imwrite(pth, img)
'''撰写专利需要,生成黑白图像'''
# imgbt = cv2.bitwise_not(img)
# for i in range(box.shape[0]):
# cv2.circle(imgbt, (int(cornpoints[i, 0]), int(cornpoints[i, 1])), 14, (0, 0, 0), 2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 2]), int(cornpoints[i, 3])), color= (0, 0, 0), markerType=3, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 4]), int(cornpoints[i, 5])), color= (0, 0, 0), markerType=4, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 6]), int(cornpoints[i, 7])), color= (0, 0, 0), markerType=5, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 8]), int(cornpoints[i, 9])), color= (0, 0, 0), markerType=6, markerSize = 30, thickness=2)
# cv2.imwrite(pth + f"/zhuanli/{file}_{track.tid}.png", imgbt)
return img
def drawTrack(tracks, img):
# img = cv2.imread("./shopcart/cart_tempt/edgeline.png")
annotator = TrackAnnotator(img, line_width=2)
for track in tracks:
if isinstance(track, np.ndarray):
annotator.plotting_track(track)
else:
annotator.plotting_track(track.boxes)
img = annotator.result()
# pth = save_dir.joinpath(f"{filename}")
# cv2.imwrite(pth, img)
return img
if __name__ == "__main__":
y = np.array([5.0, 20, 40, 41, 42, 55, 56])

View File

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 16 10:36:38 2024
@author: ym
"""
import contextlib
import time
class Profile(contextlib.ContextDecorator):
# YOLOv5 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
def __init__(self, t=0.0):
self.t = t
# self.cuda = torch.cuda.is_available()
def __enter__(self):
self.start = self.time()
return self
def __exit__(self, type, value, traceback):
self.dt = self.time() - self.start # delta-time
self.t += self.dt # accumulate dt
def time(self):
# if self.cuda:
# torch.cuda.synchronize()
return time.time()

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 31 17:07:09 2023
@author: ym
"""
from pathlib import Path
from types import SimpleNamespace
import re
import yaml
class IterableSimpleNamespace(SimpleNamespace):
"""
Ultralytics IterableSimpleNamespace is an extension class of SimpleNamespace that adds iterable functionality and
enables usage with dict() and for loops.
"""
def __iter__(self):
"""Return an iterator of key-value pairs from the namespace's attributes."""
return iter(vars(self).items())
def __str__(self):
"""Return a human-readable string representation of the object."""
return '\n'.join(f'{k}={v}' for k, v in vars(self).items())
def __getattr__(self, attr):
"""Custom attribute access error message with helpful information."""
name = self.__class__.__name__
raise AttributeError(f"""
'{name}' object has no attribute '{attr}'. This may be caused by a modified or out of date ultralytics
'default.yaml' file.\nPlease update your code with 'pip install -U ultralytics' and if necessary replace
DEFAULT_CFG_PATH with the latest version from
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml
""")
def get(self, key, default=None):
"""Return the value of the specified key if it exists; otherwise, return the default value."""
return getattr(self, key, default)
def yaml_load(file='data.yaml', append_filename=False):
"""
Load YAML data from a file.
Args:
file (str, optional): File name. Default is 'data.yaml'.
append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False.
Returns:
(dict): YAML data and file name.
"""
assert Path(file).suffix in ('.yaml', '.yml'), f'Attempting to load non-YAML file {file} with yaml_load()'
with open(file, errors='ignore', encoding='utf-8') as f:
s = f.read() # string
# Remove special characters
if not s.isprintable():
s = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]+', '', s)
# Add YAML filename to dict and return
data = yaml.safe_load(s) or {} # always return a dict (yaml.safe_load() may return None for empty files)
if append_filename:
data['yaml_file'] = str(file)
return data

View File

@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 11:04:48 2024
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
# from trackers.utils import matching
# TracksDict
def readDict(boxes, TracksDict):
feats = []
for i in range(boxes.shape[0]):
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
trackdict = TracksDict[f"frame_{fid}"]
if "feats" in trackdict:
feat = trackdict["feats"][bid]
feats.append(feat)
if "boxes" in trackdict:
box = trackdict["boxes"][bid]
assert (box[:4].astype(int) == boxes[i, :4].astype(int)).all(), f"Please check: frame_{fid}"
if "imgs" in trackdict:
img = trackdict["imgs"][bid]
cv2.imwrite(f'./data/imgs/{tid}_{fid}_{bid}.png', img)
return np.asarray(feats, dtype=np.float32)
def track_equal_track(atrack, btrack):
# boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
# 0 1 2 3 4 5 6 7 8
aboxes = atrack.boxes
bboxes = btrack.boxes
afeat = atrack.features
bfeat = btrack.features
# afeat = readDict(aboxes, TracksDict)
# bfeat = readDict(bboxes, TracksDict)
''' 1. 判断轨迹在时序上是否有交集 '''
afids = aboxes[:, 7].astype(np.int_)
bfids = bboxes[:, 7].astype(np.int_)
# 帧索引交集
interfid = set(afids).intersection(set(bfids))
# 或者直接判断帧索引是否有交集,返回 Ture or False
# interfid = set(afids).isdisjoint(set(bfids))
if len(interfid):
return False
''' 2. 轨迹特征相似度判断'''
feat = np.concatenate((afeat, bfeat), axis=0)
emb_simil = 1 - np.maximum(0.0, cdist(feat, feat, 'cosine'))
emb_ = 1 - np.maximum(0.0, cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine'))/2
if emb_[0, 0]<0.66:
return False
''' 3. 轨迹空间iou'''
alabel = np.array([0] * afids.size, dtype=np.int_)
blabel = np.array([1] * bfids.size, dtype=np.int_)
label = np.concatenate((alabel, blabel), axis=0)
fids = np.concatenate((afids, bfids), axis=0)
indices = np.argsort(fids)
idx_pair = []
for i in range(len(indices)-1):
idx1, idx2 = indices[i], indices[i+1]
if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] <= 3:
if label[idx1] == 0:
a_idx = idx1
b_idx = idx2-alabel.size
else:
a_idx = idx2
b_idx = idx1-alabel.size
idx_pair.append((a_idx, b_idx))
ious = []
embs = []
for a, b in idx_pair:
abox, bbox = aboxes[a, :], bboxes[b, :]
af, bf = afeat[a, :], bfeat[b, :]
emb_ab = 1 - np.maximum(0.0, cdist(af[None, :], bf[None, :], 'cosine'))
xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2
xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2
inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
(np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
# Union Area
box1_area = abox[2] * abox[3]
box2_area = bbox[2] * bbox[3]
union = box1_area + box2_area - inter + 1e-6
ious.append(inter/union)
embs.append(emb_ab[0, 0])
''' 4. 和同一手部关联,如何将该代码和 iou 部分相融合,需进一步完善'''
# ahands = np.array(atrack.Hands)
# bhands = np.array(btrack.Hands)
# ahids = ahands[:, 0]
# bhids = bhands[:, 0]
# interhid = set(ahids).intersection(set(bhids))
# for hid in interhid:
# aidx = ahands[:, 0] == hid
# bidx = bhands[:, 0] == hid
# ahfids = ahids[aidx, 1]
# bhfids = bhids[bidx, 1]
cont = False if len(interfid) else True # fid 无交集
cont1 = all(emb > 0.5 for emb in embs)
cont2 = all(iou > 0.5 for iou in ious)
# cont = cont and cont2 and cont3
cont = cont and cont1 and cont2
return cont
def track_equal_str(atrack, btrack):
if atrack == btrack:
return True
else:
return False
def merge_track(Residual):
out_list = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
if track_equal_str(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
out_list.append(cur_list)
return out_list
def main():
Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
out_list = merge_track(Residual)
print(Residual)
print(out_list)
if __name__ == "__main__":
main()
# =============================================================================
# for i, atrack in enumerate(input_list):
# cur_list = []
# cur_list.append(atrack)
# del input_list[i]
#
# for j, btrack in enumerate(input_list):
# if track_equal(atrack, btrack):
# cur_list.append(btrack)
# del input_list[j]
#
# out_list.append(cur_list)
# =============================================================================

View File

@ -0,0 +1,364 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib
import math
import warnings
from pathlib import Path
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from PIL import __version__ as pil_version
# from utils.general import increment_path
# from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded
# from .checks import check_font, check_version, is_ascii
# from .files import increment_path
class Colors:
"""
Ultralytics default color palette https://ultralytics.com/.
This class provides methods to work with the Ultralytics color palette, including converting hex color codes to
RGB values.
Attributes:
palette (list of tuple): List of RGB color values.
n (int): The number of colors in the palette.
pose_palette (np.array): A specific color palette array with dtype np.uint8.
"""
def __init__(self):
"""Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
self.n = len(self.palette)
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
dtype=np.uint8)
def __call__(self, i, bgr=False):
"""Converts hex color codes to RGB values."""
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h):
"""Converts hex color codes to RGB values (i.e. default PIL order)."""
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
class Annotator:
"""
Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations.
Attributes:
im (Image.Image or numpy array): The image to annotate.
pil (bool): Whether to use PIL or cv2 for drawing annotations.
font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations.
lw (float): Line width for drawing.
skeleton (List[List[int]]): Skeleton structure for keypoints.
limb_color (List[int]): Color palette for limbs.
kpt_color (List[int]): Color palette for keypoints.
"""
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
"""Initialize the Annotator class with image and line width along with color palette for keypoints and limbs."""
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
# Pose
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
"""Add one xyxy box to image with label."""
if isinstance(box, torch.Tensor):
box = box.tolist()
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h >= 3
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(self.im,
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
0,
self.lw / 3,
txt_color,
thickness=tf,
lineType=cv2.LINE_AA)
def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
"""
Plot masks on image.
Args:
masks (tensor): Predicted masks on cuda, shape: [n, h, w]
colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n]
im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
retina_masks (bool): Whether to use high resolution masks or not. Defaults to False.
"""
if self.pil:
# Convert to numpy first
self.im = np.asarray(self.im).copy()
if len(masks) == 0:
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
if im_gpu.device != masks.device:
im_gpu = im_gpu.to(masks.device)
colors = torch.tensor(colors, device=masks.device, dtype=torch.float32) / 255.0 # shape(n,3)
colors = colors[:, None, None] # shape(n,1,1,3)
masks = masks.unsqueeze(3) # shape(n,h,w,1)
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = masks_color.max(dim=0).values # shape(n,h,w,3)
im_gpu = im_gpu.flip(dims=[0]) # flip channel
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255)
im_mask_np = im_mask.byte().cpu().numpy()
self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape)
if self.pil:
# Convert im back to PIL and update draw
self.fromarray(self.im)
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
"""
Plot keypoints on the image.
Args:
kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence).
shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width.
radius (int, optional): Radius of the drawn keypoints. Default is 5.
kpt_line (bool, optional): If True, the function will draw lines connecting keypoints
for human pose. Default is True.
Note: `kpt_line=True` currently only supports human pose plotting.
"""
if self.pil:
# Convert to numpy first
self.im = np.asarray(self.im).copy()
nkpt, ndim = kpts.shape
is_pose = nkpt == 17 and ndim == 3
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
for i, k in enumerate(kpts):
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
x_coord, y_coord = k[0], k[1]
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
if len(k) == 3:
conf = k[2]
if conf < 0.5:
continue
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA)
if kpt_line:
ndim = kpts.shape[-1]
for i, sk in enumerate(self.skeleton):
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
if ndim == 3:
conf1 = kpts[(sk[0] - 1), 2]
conf2 = kpts[(sk[1] - 1), 2]
if conf1 < 0.5 or conf2 < 0.5:
continue
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
continue
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
continue
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
if self.pil:
# Convert im back to PIL and update draw
self.fromarray(self.im)
def rectangle(self, xy, fill=None, outline=None, width=1):
"""Add rectangle to image (PIL-only)."""
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False):
"""Adds text to an image using PIL or cv2."""
if anchor == 'bottom': # start y from font bottom
w, h = self.font.getsize(text) # text width, height
xy[1] += 1 - h
if self.pil:
if box_style:
w, h = self.font.getsize(text)
self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color)
# Using `txt_color` for background and draw fg with white color
txt_color = (255, 255, 255)
if '\n' in text:
lines = text.split('\n')
_, h = self.font.getsize(text)
for line in lines:
self.draw.text(xy, line, fill=txt_color, font=self.font)
xy[1] += h
else:
self.draw.text(xy, text, fill=txt_color, font=self.font)
else:
if box_style:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(text, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = xy[1] - h >= 3
p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3
cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled
# Using `txt_color` for background and draw fg with white color
txt_color = (255, 255, 255)
tf = max(self.lw - 1, 1) # font thickness
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
def fromarray(self, im):
"""Update self.im from a numpy array."""
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
def result(self):
"""Return annotated image as array."""
return np.asarray(self.im)
def scale_image(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): the original image shape
ratio_pad (tuple): the ratio of the padding to the original image.
Returns:
masks (torch.Tensor): The masks that are being returned.
"""
# Rescale coordinates (xyxy) from im1_shape to im0_shape
im1_shape = masks.shape
if im1_shape[:2] == im0_shape[:2]:
return masks
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def boxing_img(det, img, line_width=3):
annotator = Annotator(img, line_width)
for *xyxy, id, conf, cls, _, _ in reversed(det):
label = (f'id:{int(id)} '+str(int(cls)) +f' {conf:.2f}')
if cls==0:
color = colors(int(cls), True)
else:
color = colors(int(id), True)
annotator.box_label(xyxy, label, color=color)
# Save results (image and video with tracking)
imgx = annotator.result()
return imgx
def array2list(bboxes):
track_fids = np.unique(bboxes[:, 7].astype(int))
track_fids.sort()
lboxes = []
for f_id in track_fids:
# print(f"The ID is: {t_id}")
idx = np.where(bboxes[:, 7] == f_id)[0]
box = bboxes[idx, :]
lboxes.append(box)
assert len(set(box[:, 4])) == len(box), "Please check!!!"
return lboxes
def get_subimgs(imgs, tracks, scale=2):
bboxes = []
if len(tracks):
bboxes = array2list(tracks)
subimgs = []
for i, boxes in enumerate(bboxes):
fid = int(boxes[0, 7])
for *xyxy, tid, conf, cls, fid, bid in boxes:
pt2 = [p/scale for p in xyxy]
x1, y1, x2, y2 = (int(pt2[0]), int(pt2[1])), (int(pt2[2]), int(pt2[3]))
subimgs.append((int(fid), int(bid), imgs[fid-1][y1:y2, x1:x2]))
return subimgs
def draw_tracking_boxes(imgs, tracks, scale=2):
'''需要确保 imgs 覆盖tracks中的帧ID数
tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
关键:
(1) imgs中的次序和 track 中的 fid 对应
(2) img 尺度小对于xyxy减半
'''
bboxes = []
if len(tracks):
bboxes = array2list(tracks)
# if len(bboxes)!=len(imgs):
# return False, imgs
annimgs = []
for i, boxes in enumerate(bboxes):
fid = int(boxes[0, 7])
annotator = Annotator(imgs[fid-1].copy())
for *xyxy, tid, conf, cls, fid, bid in boxes:
label = f'id:{int(tid)}_{int(cls)}_{conf:.2f}'
if cls==0:
color = colors(int(cls), True)
elif tid>0 and cls!=0:
color = colors(int(tid), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
pt2 = [p/scale for p in xyxy]
annotator.box_label(pt2, label, color=color)
img = annotator.result()
annimgs.append((int(fid), img))
return annimgs

View File

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 31 17:14:37 2023
@author: ym
"""
import numpy as np
class Boxes:
def __init__(self, boxes, orig_shape=None) -> None:
"""Initialize the Boxes class."""
if boxes.ndim == 1:
boxes = boxes[None, :]
m, n = boxes.shape
assert n in (6, 7), f'expected `n` in [6, 7], but got {n}' # xyxy, track_id, conf, cls
'''对每一个box进行编号利用该编号可以索引对应 feature'''
self.data = np.concatenate([boxes[:, :4], np.arange(m).reshape(-1, 1), boxes[:, 4:]], axis=-1)
self.orig_shape = orig_shape
def cpu(self):
"""Return a copy of the tensor on CPU memory."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
def numpy(self):
"""Return a copy of the tensor as a numpy array."""
return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
@property
def xyxy(self):
"""Return the boxes in xyxy format."""
return self.data[:, :4]
@property
def xyxyb(self):
"""Return the boxes in xyxyb format."""
return self.data[:, :5]
@property
def conf(self):
"""Return the confidence values of the boxes."""
return self.data[:, -2]
@property
def cls(self):
"""Return the class values of the boxes."""
return self.data[:, -1]
# def boxes_add_fid(tboxes):
# '''
# 将 bboxes 对应的帧索引添加到 boxes 最后一列
# Return
# bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index]
# '''
# bboxes = np.empty((0, 8), dtype = np.float32)
# for tbox, f in tboxes:
# data = tbox.numpy()
# frame = f * np.ones([data.shape[0], 1])
# bbox = np.concatenate([data, frame], axis=1)
# bboxes = np.concatenate([bboxes, bbox], axis=0)
# return bboxes
def boxes_add_fid(tboxes):
'''
将 bboxes 对应的帧索引添加到 boxes 最后一列
Return
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index]
'''
bboxes = np.empty((0, 8), dtype = np.float32)
for data, f in tboxes:
frame = f * np.ones([data.shape[0], 1])
bbox = np.concatenate([data, frame], axis=1)
bboxes = np.concatenate([bboxes, bbox], axis=0)
return bboxes

View File

@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 14:28:20 2023
@author: ym
"""
import numpy as np
from scipy.spatial.distance import cdist
def boxes_add_fid(tboxes):
'''
将 bboxes 对应的帧索引添加到 boxes 最后一列
Return
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index]
'''
bboxes = np.empty((0, 8), dtype = np.float32)
for tbox, f in tboxes:
data = tbox.numpy()
frame = f * np.ones([data.shape[0], 1])
bbox = np.concatenate([data, frame], axis=1)
bboxes = np.concatenate([bboxes, bbox], axis=0)
return bboxes
def array2list(bboxes):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index]
Return
lboxes列表列表中元素具有同一 track_idxywh 格式
[x, y, w, h, track_id, score, cls, frame_index]
'''
track_ids = set(bboxes[:, 4])
lboxes = []
for t_id in track_ids:
idx = np.where(bboxes[:, 4] == t_id)[0]
box = bboxes[idx, :]
x = (box[:, 0] + box[:, 2]) / 2
y = (box[:, 1] + box[:, 3]) / 2
# box: [x, y, w, h, track_id, score, cls, frame_index]
box[:, 2] = box[:, 2] - box[:, 0]
box[:, 3] = box[:, 3] - box[:, 1]
box[:, 0] = x
box[:, 1] = y
lboxes.append(box)
return lboxes
def max_dist_track(tboxes):
'''
计算 tboxes 中最大dist的 track
Return
'''
max_track_dist, max_dist = 0, 0
for track in tboxes:
box = track[:, :4].astype(int)
dist = cdist(box[:, :2], box[:, :2])
dm = np.max(dist)
if dm > max_dist:
max_dist = dm
max_track = track.copy()
max_track_dist = dist.copy()
# 同一 track_id 中目标中心移动最大距离的索引ix1, ix2
indx, indy = np.where(dist == dm)
ix1, ix2 = indx[0], indy[0]
# 确保 ix1 < ix2索引 ix1 是开始时的视频
if ix1 > ix2: ix1, ix2 = ix2, ix1
# =============================================================================
# # =============================================================================
# # 逻辑分析
# # =============================================================================
# Scanzone = ((0, int(Height/4)), (int(2*Weight/3), Weight))
# if max_track.shape[0] > 10:
#
# # max_track 视频序列的第一帧索引 idx1
# frame_1 = int(min(max_track[:, 7]))
# idx1 = np.where(max_track[:, 7] == frame_1)[0][0]
#
# # max_track 视频序列的最后一帧索引 idx2
# frame_2 = int(max(max_track[:, 7]))
# idx2 = np.where(max_track[:, 7] == frame_2)[0][0]
#
# # max_track 视频序列的第一帧目标位置中心 (x1, y1)
# x1, y1 = max_track[idx1, :2]
#
# # max_track 视频序列的第最后一帧目标位置中心 (x2, y2)
# x2, y2 = max_track[idx2, :2]
#
#
# # track序列第一帧和最后一帧的距离该距离和 mx_dist 不是一个概念
# dist_1_2 = max_track_dist[idx1, idx2]
#
# if max_dist < 3 * Height/10:
# State = Uncertain
#
# elif y1 > y2:
# State = TakeOut
#
# elif y1 < y2:
# State = PutIn
# =============================================================================
return max_track, max_dist

View File

@ -0,0 +1,683 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 5 13:59:21 2024
函数 读取文件
extract_data() 0/1_track.data
read_tracking_output() 0/1_tracking_output.data
read_similar() process.data
@author: ym
"""
import numpy as np
import re
import os
from collections import OrderedDict
import warnings
import matplotlib.pyplot as plt
def str_to_float_arr(s):
# 移除字符串末尾的逗号(如果存在)
if s.endswith(','):
s = s[:-1]
# 使用split()方法分割字符串然后将每个元素转化为float
float_array = [float(x) for x in s.split(",")]
return float_array
def find_samebox_in_array(arr, target):
for i, st in enumerate(arr):
if st[:4] == target[:4]:
return i
return -1
def array2list(boxes, feats):
'''boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]'''
trackID = np.unique(boxes[:, 4].astype(int))
track_ids = boxes[:, 4].astype(int)
lboxes = []
for t_id in trackID:
idx = np.where(track_ids == t_id)[0]
box = boxes[idx, :]
feat = feats[idx, :]
assert len(set(box[:, 7])) == len(box), "Please check!!!"
lboxes.append(box)
return lboxes
def extract_data(datapath):
'''
0/1_track.data 数据读取
对于特征,需要构造两个对象:
(1) tracking输出 boxes依据 (fid, bid) 找到对应的 feats, tracker_feat_dict 实现较为方便
(2) frameDictList 中元素为 frameDict输出同一接口
'''
bboxes, ffeats = [], []
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackerfeats = np.empty((0, 256), dtype=np.float64)
boxes, feats, tboxes, tfeats = [], [], [], []
timestamps, frameIds = [], []
with open(datapath, 'r', encoding='utf-8') as lines:
for line in lines:
line = line.strip() # 去除行尾的换行符和可能的空白字符
if not line: # 跳过空行
continue
if line.find("CameraId")>=0:
if len(boxes): bboxes.append(np.array(boxes))
if len(feats): ffeats.append(np.array(feats))
if len(tboxes):
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)))
if len(tfeats):
trackerfeats = np.concatenate((trackerfeats, np.array(tfeats)))
timestamp, frameId = [int(ln.split(":")[1]) for ln in line.split(",")[1:]]
timestamps.append(timestamp)
frameIds.append(frameId)
boxes, feats, tboxes, tfeats = [], [], [], []
if line.find("box:") >= 0 and line.find("output_box:")<0 and line.find("out_boxes")<0:
box = line[line.find("box:") + 4:].strip()
# if len(box)==6:
boxes.append(str_to_float_arr(box))
if line.find("feat:") >= 0:
feat = line[line.find("feat:") + 5:].strip()
# if len(feat)==256:
feats.append(str_to_float_arr(feat))
if line.find("output_box:") >= 0:
assert(len(boxes)>=0 and len(boxes)==len(feats)), f"{datapath}, {datapath}, len(boxes)!=len(feats)"
box = str_to_float_arr(line[line.find("output_box:") + 11:].strip())
index = find_samebox_in_array(boxes, box)
if index >= 0:
tboxes.append(box) # 去掉'output_box:'并去除可能的空白字符
# feat_f = str_to_float_arr(input_feats[index])
feat_f = feats[index]
norm_f = np.linalg.norm(feat_f)
feat_f = feat_f / norm_f
tfeats.append(feat_f)
if len(boxes): bboxes.append(np.array(boxes))
if len(feats): ffeats.append(np.array(feats))
if len(tboxes): trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)))
if len(tfeats): trackerfeats = np.concatenate((trackerfeats, np.array(tfeats)))
assert(len(bboxes)==len(ffeats)), "Error at Yolo output!"
assert(len(trackerboxes)==len(trackerfeats)), "Error at tracker output!"
## 生成帧为单位的特征列表
tracker_feats = {}
frmIDs = np.sort(np.unique(trackerboxes[:, 7].astype(int)))
for fid in frmIDs:
idx = np.where(trackerboxes[:, 7] == fid)[0]
boxes = trackerboxes[idx, :]
feats = trackerfeats[idx, :]
for i in range(len(boxes)):
f, b = int(boxes[i, 7]), int(boxes[i, 8])
tracker_feats.update({f"{f}_{b}": feats[i, :]})
boxes, trackingboxes= [], []
tracking_flag = False
with open(datapath, 'r', encoding='utf-8') as lines:
for line in lines:
line = line.strip() # 去除行尾的换行符和可能的空白字符
if not line: # 跳过空行
tracking_flag = False
continue
if tracking_flag:
if line.find("tracking_") >= 0:
tracking_flag = False
else:
box = str_to_float_arr(line)
boxes.append(box)
if line.find("tracking_") >= 0:
tracking_flag = True
if len(boxes):
trackingboxes.append(np.array(boxes))
boxes = []
if len(boxes):
trackingboxes.append(np.array(boxes))
# tracking_feat_dict = {}
# try:
# for i, boxes in enumerate(trackingboxes):
# for box in boxes:
# tid, fid, bid = int(box[4]), int(box[7]), int(box[8])
# if f"track_{tid}" not in tracking_feat_dict:
# tracking_feat_dict[f"track_{tid}"]= {"feats": {}}
# tracking_feat_dict[f"track_{tid}"]["feats"].update({f"{fid}_{bid}": tracker_feat_dict[f"frame_{fid}"]["feats"][bid]})
# except Exception as e:
# print(f'Path: {datapath}, tracking_feat_dict can not be structured correcttly, Error: {e}')
# tracker_feat_dict = {}
# tracker_feat_dict["timestamps"] = timestamps
# tracker_feat_dict["frameIds"] = frameIds
# for i in range(len(trackerboxes)):
# fid, bid = int(trackerboxes[i, 7]), int(trackerboxes[i, 8])
# if f"frame_{fid}" not in tracker_feat_dict:
# tracker_feat_dict[f"frame_{fid}"]= {"feats": {}}
# tracker_feat_dict[f"frame_{fid}"]["feats"].update({bid: trackerfeats[i, :]})
trackingfeats = []
try:
for i, boxes in enumerate(trackingboxes):
feats = []
for box in boxes:
fid, bid = int(box[7]), int(box[8])
feat = tracker_feats[f"{fid}_{bid}"]
feats.append(feat)
trackingfeats.append(np.array(feats))
except Exception as e:
print(f'Path: {datapath}, trackingfeats can not be structured correcttly, Error: {e}')
# return bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, trackingfeats
return bboxes, ffeats, trackerboxes, tracker_feats, trackingboxes, trackingfeats
def extract_data_realtime(datapath):
boxes, feats = [], []
tracker_feats = {}
with open(datapath, 'r', encoding='utf-8') as lines:
for line in lines:
line = line.strip() # 去除行尾的换行符和可能的空白字符
if not line: # 跳过空行
continue
if line.endswith(','):
line = line[:-1]
ftlist = [float(x) for x in line.split()]
if len(ftlist) != 265: continue
boxes.append(ftlist[:9])
feats.append(ftlist[9:])
trackerboxes = np.array(boxes)
trackerfeats = np.array(feats)
if len(trackerboxes)==0 or len(trackerboxes) != len(trackerfeats):
return np.array([]), {}
frmIDs = np.sort(np.unique(trackerboxes[:, 7].astype(int)))
for fid in frmIDs:
idx = np.where(trackerboxes[:, 7] == fid)[0]
box = trackerboxes[idx, :]
feat = trackerfeats[idx, :]
for i in range(len(box)):
f, b = int(box[i, 7]), int(box[i, 8])
tracker_feats.update({f"{f}_{b}": feat[i, :]})
return trackerboxes, tracker_feats
def read_tracking_output_realtime(datapath):
trackingboxes, trackingfeats = [], []
tracking_outboxes, tracking_outfeats = [], []
with open(datapath, 'r', encoding='utf-8') as lines:
boxes, feats = [], []
Flag = False
for line in lines:
line = line.strip() # 去除行尾的换行符和可能的空白字符
if not line: # 跳过空行
continue
if line.endswith(','):
line = line[:-1]
ftlist = [float(x) for x in line.split()]
if len(ftlist) != 265: continue
Flag = all(elem == 0 for elem in ftlist)
if Flag:
trackingboxes.append(np.array(boxes))
trackingfeats.append(np.array(feats))
boxes, feats = [], []
continue
boxes.append(ftlist[:9])
feats.append(ftlist[9:])
if len(boxes):
trackingboxes.append(np.array(boxes))
trackingfeats.append(np.array(feats))
if len(trackingboxes):
tracking_outboxes = trackingboxes[:1]
tracking_outfeats = trackingfeats[:1]
return trackingboxes, trackingfeats, tracking_outboxes, tracking_outfeats
def read_tracking_output(filepath):
'''
0/1_tracking_output.data 数据读取
'''
boxes = []
feats = []
if not os.path.isfile(filepath):
return boxes, feats
with open(filepath, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # 去除行尾的换行符和可能的空白字符
if not line:
continue
if line.find("gift")>0: continue
if line.endswith(','):
line = line[:-1]
data = np.array([float(x) for x in line.split(",")])
if data.size == 9:
boxes.append(data)
if data.size == 256:
feats.append(data)
if len(feats) != len(boxes) or len(boxes)==0:
return [], []
return [np.array(boxes)], [np.array(feats)]
def read_process(filePath):
timeDict = {}
with open(filePath, 'r', encoding='utf-8') as f:
lines = f.readlines()
clines = [line.strip().replace("'", '').replace('"', '') for line in lines]
for i, line in enumerate(clines):
line = line.strip()
if line.endswith(','):
line = line[:-1]
if not line: continue
lnList = line.split(":")
if line.find("eventStart")>=0:
timeDict["eventStart"] = int(lnList[-1])
if line.find("eventEnd")>=0:
timeDict["eventEnd"] = int(lnList[-1])
if line.find("weightValue")>=0:
timeDict["weightValue"] = int(lnList[-1])
return timeDict
def read_similar(filePath):
SimiDict = {}
SimiDict['one2one'] = []
SimiDict['one2SN'] = []
SimiDict['one2n'] = []
with open(filePath, 'r', encoding='utf-8') as f:
lines = f.readlines()
clean_lines = [line.strip().replace("'", '').replace('"', '') for line in lines]
one2one_list, one2SN_list, one2n_list = [], [], []
Flag_1to1, Flag_1toSN, Flag_1ton = False, False, False
for i, line in enumerate(clean_lines):
line = line.strip()
if line.endswith(','):
line = line[:-1]
Dict = {}
if not line:
if len(one2one_list): SimiDict['one2one'] = one2one_list
if len(one2SN_list): SimiDict['one2SN'] = one2SN_list
if len(one2n_list): SimiDict['one2n'] = one2n_list
one2one_list, one2SN_list, one2n_list = [], [], []
Flag_1to1, Flag_1toSN, Flag_1ton = False, False, False
continue
if line.find('oneToOne')>=0:
Flag_1to1, Flag_1toSN, Flag_1ton = True, False,False
continue
if line.find('oneToSN')>=0:
Flag_1to1, Flag_1toSN, Flag_1ton = False, True, False
continue
if line.find('oneTon')>=0:
Flag_1to1, Flag_1toSN, Flag_1ton = False, False, True
continue
if Flag_1to1:
barcode = line.split(',')[0].strip()
value = line.split(',')[1].split(':')[1].strip()
Dict['barcode'] = barcode
Dict['similar'] = float(value)
one2one_list.append(Dict)
continue
if Flag_1toSN:
barcode = line.split(',')[0].strip()
value = line.split(',')[1].split(':')[1].strip()
Dict['barcode'] = barcode
Dict['similar'] = float(value)
one2SN_list.append(Dict)
continue
if Flag_1ton:
label = line.split(':')[0].strip()
value = line.split(':')[1].strip()
bcd = label.split('_')[-1]
if len(bcd)<8: continue
Dict['event'] = label
Dict['barcode'] = bcd
Dict['similar'] = float(value.split(',')[0])
Dict['type'] = value.split('=')[-1]
one2n_list.append(Dict)
if len(one2one_list): SimiDict['one2one'] = one2one_list
if len(one2n_list): SimiDict['one2n'] = one2n_list
if len(one2SN_list): SimiDict['one2SN'] = one2SN_list
return SimiDict
def read_weight_sensor(filepath):
WeightDict = OrderedDict()
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
clean_lines = [line.strip().replace("'", '').replace('"', '') for line in lines]
for i, line in enumerate(clean_lines):
line = line.strip()
line = line.strip()
if line.find(':') < 0: continue
if line.find("Weight") >= 0:
label = "Weight"
continue
keyword = line.split(':')[0]
value = line.split(':')[1]
if label == "Weight":
vdata = [float(s) for s in value.split(',') if len(s)]
WeightDict[keyword] = vdata[-1]
weights = [(float(t), w) for t, w in WeightDict.items()]
weights = np.array(weights).astype(np.int64)
return weights
def read_weight_timeConsuming(filePth):
WeightDict, SensorDict, ProcessTimeDict = OrderedDict(), OrderedDict(), OrderedDict()
with open(filePth, 'r', encoding='utf-8') as f:
lines = f.readlines()
# label = ''
for i, line in enumerate(lines):
line = line.strip()
if line.find(':') < 0: continue
if line.find("Weight") >= 0:
label = "Weight"
continue
if line.find("Sensor") >= 0:
label = "Sensor"
continue
if line.find("processTime") >= 0:
label = "ProcessTime"
continue
keyword = line.split(':')[0]
value = line.split(':')[1]
if label == "Weight":
WeightDict[keyword] = float(value.strip(','))
if label == "Sensor":
SensorDict[keyword] = [float(s) for s in value.split(',') if len(s)]
if label == "ProcessTime":
ProcessTimeDict[keyword] = float(value.strip(','))
# print("Done!")
return WeightDict, SensorDict, ProcessTimeDict
def read_deletedBarcode_file(filePath):
with open(filePath, 'r', encoding='utf-8') as f:
lines = f.readlines()
split_flag, all_list = False, []
dict, barcode_list, similarity_list = {}, [], []
clean_lines = [line.strip().replace("'", '').replace('"', '') for line in lines]
for i, line in enumerate(clean_lines):
if line.endswith(','):
line = line[:-1]
stripped_line = line.strip()
if not stripped_line:
if len(barcode_list): dict['barcode'] = barcode_list
if len(similarity_list): dict['similarity'] = similarity_list
if len(dict): all_list.append(dict)
split_flag = False
dict, barcode_list, similarity_list = {}, [], []
continue
if line.find(':')<0: continue
label = line.split(':')[0]
value = line.split(':')[1]
if label == 'SeqDir':
dict['SeqDir'] = value
dict['filetype'] = "deletedBarcode"
if label == 'Deleted':
dict['Deleted'] = value
if label == 'List':
split_flag = True
continue
if split_flag:
barcode_list.append(label)
similarity_list.append(value)
if len(barcode_list): dict['barcode'] = barcode_list
if len(similarity_list): dict['similarity'] = similarity_list
if len(dict): all_list.append(dict)
return all_list
def read_returnGoods_file(filePath):
'''
20241030开始原 deletedBarcode.txt 中数据格式修改为 returnGoods.txt读数方式随之变化
'''
with open(filePath, 'r', encoding='utf-8') as f:
lines = f.readlines()
clean_lines = [line.strip().replace("'", '').replace('"', '') for line in lines]
all_list = []
split_flag, dict = False, {}
barcode_list, similarity_list = [], []
event_list, type_list = [], []
for i, line in enumerate(clean_lines):
stripped_line = line.strip()
if line.endswith(','):
line = line[:-1]
if not stripped_line:
if len(barcode_list): dict['barcode'] = barcode_list
if len(similarity_list): dict['similarity'] = similarity_list
if len(event_list): dict['event'] = event_list
if len(type_list): dict['type'] = type_list
if len(dict) and dict['SeqDir'].find('*')<0:
all_list.append(dict)
split_flag, dict = False, {}
barcode_list, similarity_list = [], []
event_list, type_list = [], []
continue
if line.find(':')<0: continue
if line.find('1:n')==0: continue
label = line.split(':')[0].strip()
value = line.split(':')[1].strip()
if label == 'SeqDir':
dict['SeqDir'] = value
dict['Deleted'] = value.split('_')[-1]
dict['filetype'] = "returnGoods"
if label == 'List':
split_flag = True
continue
if split_flag:
bcd = label.split('_')[-1]
if len(bcd)<8: continue
# event_list.append(label + '_' + bcd)
event_list.append(label)
barcode_list.append(bcd)
similarity_list.append(value.split(',')[0])
type_list.append(value.split('=')[-1])
if len(barcode_list): dict['barcode'] = barcode_list
if len(similarity_list): dict['similarity'] = similarity_list
if len(event_list): dict['event'] = event_list
if len(type_list): dict['type'] = type_list
if len(dict) and dict['SeqDir'].find('*')<0:
all_list.append(dict)
return all_list
def plot_sensor_curve(WeightDict, SensorDict, ProcessTimeDict):
wtime, wdata = [], []
stime, sdata = [], []
for key, value in WeightDict.items():
wtime.append(int(key))
wdata.append(value)
for key, value in SensorDict.items():
if len(value) != 9: continue
stime.append(int(key))
sdata.append(np.array(value))
static_range = []
dynamic_range = []
windth = 8
nw = len(wdata)
assert(nw) >= 8, "The num of weight data is less than 8!"
# i1, i2 = 0, 7
# while i2 < nw:
# data = wdata[i1:(i2+1)]
# max(data) - min(data)
# if i2<7:
# i1 = 0
# else:
# i1 = i2-windth
min_t = min(wtime + stime)
wtime = [t-min_t for t in wtime]
stime = [t-min_t for t in stime]
max_t = max(wtime + stime)
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(2, 1, left=0.1, right=0.9, bottom=0.1, top=0.9,
wspace=0.05, hspace=0.15)
# ax1, ax2 = axs
ax1 = fig.add_subplot(gs[0,0])
ax2 = fig.add_subplot(gs[1,0])
ax1.plot(wtime, wdata, 'b--', linewidth=2 )
for i in range(9):
ydata = [s[i] for s in sdata]
ax2.plot(stime, ydata, linewidth=2 )
ax1.grid(True), ax1.set_xlim(0, max_t), ax1.set_title('Weight')
ax1.set_label("(Time: ms)")
# ax1.legend()
ax2.grid(True), ax2.set_xlim(0, max_t), ax2.set_title('IMU')
# ax2.legend()
plt.show()
def test_process(file_path):
WeightDict, SensorDict, ProcessTimeDict = read_weight_timeConsuming(file_path)
plot_sensor_curve(WeightDict, SensorDict, ProcessTimeDict)
def main():
files_path = r'\\192.168.1.28\share\测试_202406\0814\0814\20240814-102227-62264578-a720-4eb9-b95e-cb8be009aa98_null'
k = 0
for filename in os.listdir(files_path):
filename = 'process.data'
file_path = os.path.join(files_path, filename)
if os.path.isfile(file_path) and filename.find("track.data")>0:
extract_data(file_path)
if os.path.isfile(file_path) and filename.find("process.data")>=0:
test_process(file_path)
k += 1
if k == 1:
break
def main1():
fpath = r'\\192.168.1.28\share\测试视频数据以及日志\各模块测试记录\比对测试\1209永辉超市测试\20241209-155924-117e1941-70f8-4287-8de1-4866868548a6_6926475209967\process.data'
simidct = read_similar(fpath)
print(simidct)
if __name__ == "__main__":
main1()

View File

@ -0,0 +1,250 @@
# -*- coding: utf-8 -*-
"""
Created on Tue May 21 15:25:23 2024
读取 Pipeline 各模块的数据,主代码由 马晓慧 完成
@author: ieemoo-zl003
"""
import os
import numpy as np
# 替换为你的目录路径
files_path = 'D:/contrast/dataset/1_to_n/709/20240709-112658_6903148351833/'
def str_to_float_arr(s):
# 移除字符串末尾的逗号(如果存在)
if s.endswith(','):
s = s[:-1]
# 使用split()方法分割字符串然后将每个元素转化为float
float_array = np.array([float(x) for x in s.split(",")])
return float_array
def extract_tracker_input_boxes_feats(file_name):
boxes = []
feats = []
with open(file_name, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # 去除行尾的换行符和可能的空白字符
# 跳过空行
if not line:
continue
# 检查是否以'box:'或'feat:'开始
if line.find("box:") >= 0 and line.find("output_box:") < 0:
box = line[line.find("box:") + 4:].strip()
boxes.append(str_to_float_arr(box)) # 去掉'box:'并去除可能的空白字符
if line.find("feat:") >= 0:
feat = line[line.find("feat:") + 5:].strip()
feats.append(str_to_float_arr(feat)) # 去掉'box:'并去除可能的空白字符
return np.array(boxes), np.array(feats)
def find_string_in_array(arr, target):
"""
在字符串数组中找到目标字符串对应的行(索引)。
参数:
arr -- 字符串数组
target -- 要查找的目标字符串
返回:
目标字符串在数组中的索引。如果未找到,则返回-1。
"""
tg = [float(t) for k, t in enumerate(target.split(',')) if k<4][:4]
for i, st in enumerate(arr):
st = [float(s) for k, s in enumerate(target.split(',')) if k<4][:4]
if st == tg:
return i
# if st[:20] == target[:20]:
# return i
return -1
def find_samebox_in_array(arr, target):
for i, st in enumerate(arr):
if all(st[:4] == target[:4]):
return i
return -1
def extract_tracker_output_boxes_feats(read_file_name):
input_boxes, input_feats = extract_tracker_input_boxes_feats(read_file_name)
boxes = []
feats = []
with open(read_file_name, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # 去除行尾的换行符和可能的空白字符
# 跳过空行
if not line:
continue
# 检查是否以'output_box:'开始
if line.find("output_box:") >= 0:
box = str_to_float_arr(line[line.find("output_box:") + 11:].strip())
boxes.append(box) # 去掉'output_box:'并去除可能的空白字符
index = find_samebox_in_array(input_boxes, box)
if index >= 0:
# feat_f = str_to_float_arr(input_feats[index])
feat_f = input_feats[index]
norm_f = np.linalg.norm(feat_f)
feat_f = feat_f / norm_f
feats.append(feat_f)
return input_boxes, input_feats, np.array(boxes), np.array(feats)
def extract_tracking_output_boxes_feats(read_file_name):
tracker_boxes, tracker_feats, input_boxes, input_feats = extract_tracker_output_boxes_feats(read_file_name)
boxes = []
feats = []
tracking_flag = False
with open(read_file_name, 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # 去除行尾的换行符和可能的空白字符
# 跳过空行
if not line:
continue
if tracking_flag:
if line.find("tracking_") >= 0:
tracking_flag = False
else:
box = str_to_float_arr(line)
boxes.append(box)
index = find_samebox_in_array(input_boxes, box)
if index >= 0:
feats.append(input_feats[index])
# 检查是否以tracking_'开始
if line.find("tracking_") >= 0:
tracking_flag = True
assert(len(tracker_boxes)==len(tracker_feats)), "Error at Yolo output"
assert(len(input_boxes)==len(input_feats)), "Error at tracker output"
assert(len(boxes)==len(feats)), "Error at tracking output"
return tracker_boxes, tracker_feats, input_boxes, input_feats, np.array(boxes), np.array(feats)
def read_tracking_input(datapath):
with open(datapath, 'r') as file:
lines = file.readlines()
data = []
for line in lines:
data.append([s for s in line.split(',') if len(s)>=3])
# data.append([float(s) for s in line.split(',') if len(s)>=3])
# data = np.array(data, dtype = np.float32)
try:
data = np.array(data, dtype = np.float32)
except Exception as e:
data = np.array([], dtype = np.float32)
print('DataError for func: read_tracking_input()')
return data
def read_tracker_input(datapath):
with open(datapath, 'r') as file:
lines = file.readlines()
Videos = []
FrameBoxes, FrameFeats = [], []
boxes, feats = [], []
timestamp = []
t1 = None
for line in lines:
if line.find('CameraId') >= 0:
t = int(line.split(',')[1].split(':')[1])
timestamp.append(t)
if len(boxes) and len(feats):
FrameBoxes.append(np.array(boxes, dtype = np.float32))
FrameFeats.append(np.array(feats, dtype = np.float32))
boxes, feats = [], []
if t1 and t - t1 > 1e3:
Videos.append((FrameBoxes, FrameFeats))
FrameBoxes, FrameFeats = [], []
t1 = int(line.split(',')[1].split(':')[1])
if line.find('box') >= 0:
box = line.split(':', )[1].split(',')[:-1]
boxes.append(box)
if line.find('feat') >= 0:
feat = line.split(':', )[1].split(',')[:-1]
feats.append(feat)
FrameBoxes.append(np.array(boxes, dtype = np.float32))
FrameFeats.append(np.array(feats, dtype = np.float32))
Videos.append((FrameBoxes, FrameFeats))
# TimeStamp = np.array(timestamp, dtype = np.int64)
# DimesDiff = np.diff((TimeStamp))
# sorted_indices = np.argsort(TimeStamp)
# TimeStamp_sorted = TimeStamp[sorted_indices]
# DimesDiff_sorted = np.diff((TimeStamp_sorted))
return Videos
def main():
files_path = 'D:/contrast/dataset/1_to_n/709/20240709-112658_6903148351833/'
# 遍历目录下的所有文件和目录
for filename in os.listdir(files_path):
# 构造完整的文件路径
file_path = os.path.join(files_path, filename)
if os.path.isfile(file_path) and filename.find("track.data")>0:
tracker_boxes, tracker_feats, tracking_boxes, tracking_feats, output_boxes, output_feats = extract_tracking_output_boxes_feats(file_path)
print("Done")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 8 09:51:59 2024
@author: ym
"""
import os
def main():
directory = r'D:\DetectTracking\runs\detect'
directory = r'D:\DetectTracking\tracking\result\tracks'
suffix = '_'
for root, dirs, files in os.walk(directory):
for name in dirs:
old_name = os.path.join(root, name)
new_name = os.path.join(root, f"{name}{suffix}")
try:
os.rename(old_name, new_name)
except Exception as e:
print(f"Failed to rename directory '{old_name}': {e}")
for name in files:
old_name = os.path.join(root, name)
file, ext = os.path.splitext(name)
new_name = os.path.join(root, f"{file}{suffix}{ext}")
try:
os.rename(old_name, new_name)
except Exception as e:
print(f"Failed to rename file '{old_name}': {e}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,361 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 5 10:01:11 2023
@author: ym
"""
import numpy as np
import os
import cv2
import sys
from scipy.spatial.distance import cdist
import matplotlib
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
# from ultralytics.utils.plotting import Annotator, colors
from .annotator import TrackAnnotator
# from .processboxes import array2list
# boxes Format: [x1, y1, x2, y2, track_id, score, cls, frame_index]
pth = r"D:/DeepLearning/yolov5/tracking/"
colors = np.array([[255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255],
[0, 0, 255], [0, 255, 0], [255, 51, 255], [102, 178, 255], [51, 153, 255],
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [255, 102, 255], [153, 204, 255], [255, 0, 0], [255, 255, 255]],
dtype=np.uint8)
def array2list(bboxes):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
tids = set(bboxes[:, 4])
track_ids = bboxes[:, 4].astype(int)
lboxes = []
for t_id in tids:
# print(f"The ID is: {t_id}")
idx = np.where(track_ids == t_id)[0]
box = bboxes[idx, :]
lboxes.append(box)
return lboxes
def draw5points(bboxes, file):
"""
显示中心点、4角点的轨迹以及轨迹 features
"""
image = cv2.imread(pth + r"/shopcart/cart_tempt/edgeline.png")
imgx = image.copy()
annotator = TrackAnnotator(imgx, line_width=2)
lboxes = array2list(bboxes)
for k in range(len(lboxes)):
boxes = lboxes[k]
cls = int(boxes[0, 6])
tid = int(boxes[0, 4])
# print(tid)
frnum = boxes.shape[0]
cornpoints = np.zeros((frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
x1, y1, x2, y2 = cornpoints[:,2],cornpoints[:,3],cornpoints[:,8],cornpoints[:,9]
BoundPixel = 10
BoundThresh = 0.4
cont1 = sum(abs(x1)<BoundPixel) / frnum > BoundThresh
cont2 = sum(abs(y1)<BoundPixel) / frnum > BoundThresh
cont3 = sum(abs(x2-1024)<BoundPixel) / frnum > BoundThresh
cont4 = sum(abs(y2-1280)<BoundPixel) / frnum > BoundThresh
isImgBorder = False
if cont1 or cont2 or cont3 or cont4:
isImgBorder = True
# =============================================================================
# '''情况1: 在商品运动过程中,商品检测框始终左下角点和图像左下角点重合, 用中心点代替'''
# lfcn_dist = np.linalg.norm(cornpoints[:, 6:8] - [0, 1280], axis=1)
# idx1 = lfcn_dist<10
# if sum(idx1)/frnum > 0.5:
# cornpoints[:, 6:8] = cornpoints[:, 0:2]
#
# '''情况2: 在商品运动过程中,商品检测框始终右下角点和图像右下角点重合, 用中心点代替'''
# rtcn_dist = np.linalg.norm(cornpoints[:, 8:10] - [1024, 1280], axis=1)
# idx2 = rtcn_dist<10
# if sum(idx2)/frnum > 0.5:
# cornpoints[:, 8:10] = cornpoints[:, 0:2]
# =============================================================================
mwh = (np.mean(boxes[:, 2]) + np.mean(boxes[:, 3]))/2
trajectory = []
trajlens = []
trajdist = []
for k in range(5):
traj = np.linalg.norm(np.diff(cornpoints[:, 2*k:2*(k+1)], axis = 0), axis=1)
trajlen = np.sum(traj)
ptdist = np.max(cdist(cornpoints[:, 2*k:2*(k+1)], cornpoints[:, 2*k:2*(k+1)]))
trajectory.append(traj)
trajlens.append(trajlen)
trajdist.append(ptdist)
if not isImgBorder:
idx = trajlens.index(min(trajlens))
trajmin = trajectory[idx]
trajlen_min = min(trajlens)
trajdist_min = min(trajdist)
else:
trajmin = trajectory[0]
trajlen_min = trajlens[0]
trajdist_min = trajdist[0]
'''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
trajlen_rate = trajlen_min/(max(trajlens)+0.0001)
'''最小轨迹欧氏距离/目标框尺度均值'''
trajdist_rate = trajdist_min/(mwh+0.0001)
# idx = trajlens.index(min(trajlens))
# trajmin = trajectory[idx]
# '''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
# trajlen_rate = min(trajlens)/(max(trajlens)+0.0001)
# '''最小轨迹欧氏距离,越小,代表运动幅度越小'''
# trajdist_min = min(trajdist)
# '''最小轨迹欧氏距离 / 目标框尺度均值'''
# mindist_rate = min(trajdist)/(mwh+0.0001)
img = image.copy()
for i in range(boxes.shape[0]):
cv2.circle(img, (int(cornpoints[i, 0]), int(cornpoints[i, 1])), 6, (255, 255, 255), 2)
cv2.circle(img, (int(cornpoints[i, 2]), int(cornpoints[i, 3])), 6, (255, 0, 255), 2)
cv2.circle(img, (int(cornpoints[i, 4]), int(cornpoints[i, 5])), 6, (0, 255, 0), 2)
cv2.circle(img, (int(cornpoints[i, 6]), int(cornpoints[i, 7])), 6, (64, 128, 255), 2)
cv2.circle(img, (int(cornpoints[i, 8]), int(cornpoints[i, 9])), 6, (255, 128, 64), 2)
# if frnum>=3:
# cntpoints = cornpoints[:, 0:2].astype(np.int64)
# rect = cv2.minAreaRect(cntpoints)
# box = cv2.boxPoints(rect)
# box = np.int0(box)
# cv2.drawContours(img, [box], 0, (255, 0, 0), 2)
# img1 = image.copy()
# for i in range(boxes.shape[0]-1):
# pt1 = cornpoints[i, :].astype(np.int64)
# pt2 = cornpoints[i+1, :].astype(np.int64)
# cv2.line(img1, pt1, pt2, color=(255, 255, 255), thickness=2)
# gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
# _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
color = ((0, 0, 255), (255, 128, 0))
label_6 = "PCA(singular_values_) : "
label_7 = "Rect : "
if frnum>=3:
if isImgBorder:
X = cornpoints[:, 0:2]
else:
X = cornpoints[:, 2*idx:2*(idx+1)]
pca = PCA()
pca.fit(X)
label_6 = "PCA(variance_ratio) : {:.2f}".format(pca.explained_variance_ratio_[0])
for i, (comp, var) in enumerate(zip(pca.components_, pca.explained_variance_ratio_)):
pt1 = (pca.mean_ - comp*var*200).astype(np.int64)
pt2 = (pca.mean_ + comp*var*200).astype(np.int64)
cv2.line(img, pt1, pt2, color=color[i], thickness=2)
rect = cv2.minAreaRect(X.astype(np.int64))
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(img, [box], 0, (0, 255, 0), 2)
label_7 = "Rect W&H&Ratio: {}, {}, {:.2f}".format(int(rect[1][0]), int(rect[1][1]), min(rect[1])/(max(rect[1])+0.001))
'''撰写专利需要,生成黑白图像'''
# imgbt = cv2.bitwise_not(img)
# for i in range(box.shape[0]):
# cv2.circle(imgbt, (int(cornpoints[i, 0]), int(cornpoints[i, 1])), 14, (0, 0, 0), 2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 2]), int(cornpoints[i, 3])), color= (0, 0, 0), markerType=3, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 4]), int(cornpoints[i, 5])), color= (0, 0, 0), markerType=4, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 6]), int(cornpoints[i, 7])), color= (0, 0, 0), markerType=5, markerSize = 30, thickness=2)
# cv2.drawMarker(imgbt, (int(cornpoints[i, 8]), int(cornpoints[i, 9])), color= (0, 0, 0), markerType=6, markerSize = 30, thickness=2)
# cv2.imwrite(pth + f"/zhuanli/{file}_{tid}.png", imgbt)
if len(trajmin):
trajstd = np.std(trajmin)
else:
trajstd = 0
trajlens = [int(t) for t in trajlens]
trajdist = [int(t) for t in trajdist]
label_1 = f"trajlens: {trajlens}, trajlen_min: {int(trajlen_min)}"
label_2 = f"trajdist: {trajdist}: trajdist_min: {int(trajdist_min)}"
label_3 = "trajlen_min/max(trajlens): {:.2f}/{} = {:.2f}".format(trajlen_min, max(trajlens), trajlen_rate)
label_4 = "trajdist_min/mwh : {:.2f}/{} = {:.2f}".format(trajdist_min, int(mwh), trajdist_rate)
label_5 = "std(trajmin) : {:.2f}".format(trajstd)
label = [label_1, label_2, label_3, label_4, label_5, label_6, label_7]
word = 'abc'
w, h = cv2.getTextSize('abc', 0, fontScale=2, thickness=1)[0]
for i in range(len(label)):
# color = [int(x) for x in colors[i]]
cv2.putText(img,
label[i],
(20, int(50+(i+1)*1.2*h)),
0,
1,
[int(x) for x in colors[i]],
2,
lineType=cv2.LINE_AA)
cv2.imwrite(pth + f"/result/cls11_80212/{file}_{tid}.png", img)
def drawtracks(bboxes, imgshow=None):
"""
Inputs
bboxes: 原始检测跟踪后的结果,变换为 tboxes
image只用于获取图像的Width, Height
Outputs:
imgshow
"""
if imgshow is None:
edgeline = cv2.imread(pth + r"/shopcart/cart_tempt/edgeline.png")
# edgeline = cv2.bitwise_not(edgeline)
H, W = edgeline.shape[0:2]
imgshow= np.zeros((H, W, 3), np.uint8)
if 'edgeline' in locals().keys():
imgshow = cv2.add(imgshow, edgeline)
## ==== list其中元素格式: [x, y, w, h, track_id, score, cls, frame_index]
tboxes = array2list(bboxes)
# imgshow = cv2.bitwise_not(imgshow)
annotator = TrackAnnotator(imgshow, line_width=2)
for boxes in tboxes:
annotator.plotting_track(boxes)
imgshow = annotator.result()
return imgshow
def writefilename():
npydir = r"D:\DeepLearning\yolov5\runs\boxes"
files = [f.split('.')[0] for f in os.listdir(npydir)]
with open('data.txt', 'w') as f:
[f.write(f"{file}:\n") for file in files]
print("len(files)")
# for filename in os.listdir(npydir):
# file, ext = os.path.splitext(filename)
def main():
npydir = r"D:\DeepLearning\yolov5\runs\boxes"
k = 0
fields = []
for filename in os.listdir(npydir):
# filename = "加购_快速置入_12.npy"
print(filename)
file, ext = os.path.splitext(filename)
filepath = os.path.join(npydir, filename)
try:
bboxes = np.load(filepath)
imgshow = drawtracks(bboxes, file)
draw5points(bboxes, file)
cv2.imwrite(pth + f"/result/cls11_80212/{file}_show.png", imgshow)
except Exception as e:
# print(str(e))
pass
# k += 1
# if k == 1:
# break
if __name__ == "__main__":
main()
# writefilename()

View File

@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 14:10:09 2023
@author: ym
"""
import numpy as np
import os
import cv2
# from pathlib import Path
# import math
# import sys
# from scipy.spatial.distance import cdist
VideoFormat = ['.mp4', '.avi', '.ts']
def video2imgs(videof, imgdir):
cap = cv2.VideoCapture(videof)
i = 0
while True:
ret, frame = cap.read()
if not ret:
break
imgp = os.path.join(imgdir, f"{i}.png")
i += 1
cv2.imwrite(imgp, frame)
if i == 400:
break
cap.release()
print(os.path.basename(videof) + f" haved resolved")
def videosave(bboxes, videopath="100_1688009697927.mp4"):
cap = cv2.VideoCapture(videopath)
fps = int(cap.get(cv2.CAP_PROP_FPS)) # integer required, floats produce error in MP4 codec
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
## =========================================== 在当前模块地址下存储图像和视频
path = os.path.split(os.path.realpath(__file__))[0]
_, filename = os.path.split(videopath)
file, ext = os.path.splitext(filename)
## ======================================================== 视频保存设置
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
save_video_path = os.path.join(path, "{}_show_1.mp4".format(file))
vid_writer = cv2.VideoWriter(save_video_path, fourcc, fps, (width, height))
## ======================================================== 图像保存路径设置
save_img_path = os.path.join(path, "{}_show".format(file))
if not os.path.exists(save_img_path):
os.makedirs(save_img_path)
cout = 0
while cap.isOpened():
ret, frame = cap.read()
if ret:
idx = np.where(bboxes[:, 7] == cout)[0]
box = bboxes[idx, 0:4].astype(int)
for i in range(box.shape[0]):
x1, y1 = box[i, :2]
x2, y2 = box[i, 2:4]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 125, 255), 2)
cv2.imwrite(os.path.join(save_img_path, "{}.png".format(cout)), frame)
vid_writer.write(frame)
cout += 1
else:
print("end!!!!!!!!!!!!!!!!!!!")
break
vid_writer.release()
cap.release()
def main():
videopath = r'D:\videos'
savepath = r'D:\videos'
# video2imgs(videopath, savepath)
k = 0
for filename in os.listdir(videopath):
# filename = "20240929-155533.ts"
file, ext = os.path.splitext(filename)
if ext not in VideoFormat:
continue
basename = os.path.basename(videopath)
imgbase = basename + '-&-' + file
imgdir = os.path.join(savepath, imgbase)
if not os.path.exists(imgdir):
os.mkdir(imgdir)
videof = os.path.join(videopath, filename)
video2imgs(videof, imgdir)
# k += 1
# if k == 1:
# break
if __name__ == '__main__':
main()