更新 detacttracking

This commit is contained in:
lee
2025-01-22 13:16:44 +08:00
parent 2320468c40
commit c9d79f8059
355 changed files with 61097 additions and 1 deletions

Submodule detecttracking deleted from 2feedd622d

7
detecttracking/README.md Normal file
View File

@ -0,0 +1,7 @@
说明:
该代码仓库是yolov5_track文件夹下代码的备份这里的yolov5代码来至https://github.com/ultralytics/yolov5
gitee地址为https://gitee.com/nanjing-yimao-information/dettrack
核心模块:
track_reid.py实现
1. yolov5检测
2. Bot-SORT用于目标跟踪

View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 26 08:53:58 2024
@author: ym
"""

View File

@ -0,0 +1,352 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 16 18:56:18 2024
@author: ym
"""
import os
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties
from scipy.spatial.distance import cdist
from utils.event import ShoppingEvent, save_data
rcParams['font.sans-serif'] = ['SimHei'] # 用黑体显示中文
rcParams['axes.unicode_minus'] = False # 正确显示负号
'''*********** USearch ***********'''
def read_usearch():
stdFeaturePath = r"D:\contrast\stdlib\v11_test.json"
stdBarcode = []
stdlib = {}
with open(stdFeaturePath, 'r', encoding='utf-8') as f:
data = json.load(f)
for dic in data['total']:
barcode = dic['key']
feature = np.array(dic['value'])
stdBarcode.append(barcode)
stdlib[barcode] = feature
return stdlib
def get_eventlist():
'''
读取一次测试中的错误事件
'''
evtpaths = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\images"
text1 = "one2n_Error.txt"
text2 = "one2SN_Error.txt"
events = []
text = (text1, text2)
for txt in text:
txtfile = os.path.join(evtpaths, txt)
with open(txtfile, "r") as f:
lines = f.readlines()
for i, line in enumerate(lines):
line = line.strip()
if line:
fpath=os.path.join(evtpaths, line)
events.append(fpath)
events = list(set(events))
return events
def single_event():
events = get_eventlist()
'''定义当前事件存储地址及生成相应文件件'''
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\single_event"
for evtpath in events:
event = ShoppingEvent(evtpath)
save_data(event, resultPath)
print(event.evtname)
def get_topk_percent(data, k):
"""
获取数据中最大的 k% 的元素
"""
# 将数据转换为 NumPy 数组
if isinstance(data, list):
data = np.array(data)
percentile = np.percentile(data, 100-k)
top_k_percent = data[data >= percentile]
return top_k_percent
def cluster(data, thresh=0.15):
# data = np.array([0.1, 0.13, 0.7, 0.2, 0.8, 0.52, 0.3, 0.7, 0.85, 0.58])
# data = np.array([0.1, 0.13, 0.2, 0.3])
# data = np.array([0.1])
if isinstance(data, list):
data = np.array(data)
data1 = np.sort(data)
cluter, Cluters, = [data1[0]], []
for i in range(1, len(data1)):
if data1[i] - data1[i-1]< thresh:
cluter.append(data1[i])
else:
Cluters.append(cluter)
cluter = [data1[i]]
Cluters.append(cluter)
clt_center = []
for clt in Cluters:
## 是否应该在此处限制一个聚类中的最小轨迹样本数,应该将该因素放在轨迹分析中
# if len(clt)>=3:
# clt_center.append(np.mean(clt))
clt_center.append(np.mean(clt))
# print(clt_center)
return clt_center
def calc_simil(event, stdfeat):
def calsiml(feat1, feat2):
'''轨迹样本和标准特征集样本相似度的选择策略'''
matrix = 1 - cdist(feat1, feat2, 'cosine')
simi_max = []
for i in range(len(matrix)):
sim = np.mean(get_topk_percent(matrix[i, :], 75))
simi_max.append(sim)
cltc_max = cluster(simi_max)
Simi = max(cltc_max)
## cltc_max为空属于编程考虑不周应予以排查解决
# if len(cltc_max):
# Simi = max(cltc_max)
# else:
# Simi = 0 #不应该走到该处
return Simi
front_boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
front_feats = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
for i in range(len(event.front_boxes)):
front_boxes = np.concatenate((front_boxes, event.front_boxes[i]), axis=0)
front_feats = np.concatenate((front_feats, event.front_feats[i]), axis=0)
back_boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
back_feats = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
for i in range(len(event.back_boxes)):
back_boxes = np.concatenate((back_boxes, event.back_boxes[i]), axis=0)
back_feats = np.concatenate((back_feats, event.back_feats[i]), axis=0)
if len(front_feats):
front_simi = calsiml(front_feats, stdfeat)
if len(back_feats):
back_simi = calsiml(back_feats, stdfeat)
'''前后摄相似度融合策略'''
if len(front_feats) and len(back_feats):
diff_simi = abs(front_simi - back_simi)
if diff_simi>0.15:
Similar = max([front_simi, back_simi])
else:
Similar = (front_simi+back_simi)/2
elif len(front_feats) and len(back_feats)==0:
Similar = front_simi
elif len(front_feats)==0 and len(back_feats):
Similar = back_simi
else:
Similar = None # 在event.front_feats和event.back_feats同时为空时
return Similar
def simi_matrix():
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\single_event"
stdlib = read_usearch()
events = get_eventlist()
for evtpath in events:
evtname = os.path.basename(evtpath)
_, barcode = evtname.split("_")
# 生成事件与相应标准特征集
event = ShoppingEvent(evtpath)
stdfeat = stdlib[barcode]
Similar = calc_simil(event, stdfeat)
# 构造 boxes 子图存储路径
subimgpath = os.path.join(resultPath, f"{event.evtname}", "subimg")
if not os.path.exists(subimgpath):
os.makedirs(subimgpath)
histpath = os.path.join(resultPath, "simi_hist")
if not os.path.exists(histpath):
os.makedirs(histpath)
mean_values, max_values = [], []
cameras = ('front', 'back')
fig, ax = plt.subplots(2, 3, figsize=(16, 9), dpi=100)
kpercent = 25
for camera in cameras:
boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
evtfeat = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
if camera == 'front':
for i in range(len(event.front_boxes)):
boxes = np.concatenate((boxes, event.front_boxes[i]), axis=0)
evtfeat = np.concatenate((evtfeat, event.front_feats[i]), axis=0)
imgpaths = event.front_imgpaths
else:
for i in range(len(event.back_boxes)):
boxes = np.concatenate((boxes, event.back_boxes[i]), axis=0)
evtfeat = np.concatenate((evtfeat, event.back_feats[i]), axis=0)
imgpaths = event.back_imgpaths
assert len(boxes)==len(evtfeat), f"Please check the Event: {evtname}"
if len(boxes)==0: continue
print(evtname)
matrix = 1 - cdist(evtfeat, stdfeat, 'cosine')
simi_1d = matrix.flatten()
simi_mean = np.mean(matrix, axis=1)
# simi_max = np.max(matrix, axis=1)
'''以相似度矩阵每一行最大的 k% 的相似度做均值计算'''
simi_max = []
for i in range(len(matrix)):
sim = np.mean(get_topk_percent(matrix[i, :], kpercent))
simi_max.append(sim)
mean_values.append(np.mean(matrix))
max_values.append(np.mean(simi_max))
diff_max_mean = np.mean(simi_max) - np.mean(matrix)
'''相似度统计特性图示'''
k =0
if camera == 'front': k = 1
'''********************* 相似度全体数据 *********************'''
ax[k, 0].hist(simi_1d, bins=60, range=(-0.2, 1), edgecolor='black')
ax[k, 0].set_xlim([-0.2, 1])
ax[k, 0].set_title(camera)
_, y_max = ax[k, 0].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 0].text(-0.1, 0.15*y_max, f"rng:{max(simi_1d)-min(simi_1d):.3f}", fontsize=18, color='b')
'''********************* 均值********************************'''
ax[k, 1].hist(simi_mean, bins=24, range=(-0.2, 1), edgecolor='black')
ax[k, 1].set_xlim([-0.2, 1])
ax[k, 1].set_title("mean")
_, y_max = ax[k, 1].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 1].text(-0.1, 0.15*y_max, f"rng:{max(simi_mean)-min(simi_mean):.3f}", fontsize=18, color='b')
'''********************* 最大值 ******************************'''
ax[k, 2].hist(simi_max, bins=24, range=(-0.2, 1), edgecolor='black')
ax[k, 2].set_xlim([-0.2, 1])
ax[k, 2].set_title("max")
_, y_max = ax[k, 2].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 2].text(-0.1, 0.15*y_max, f"rng:{max(simi_max)-min(simi_max):.3f}", fontsize=18, color='b')
'''绘制聚类中心'''
cltc_mean = cluster(simi_mean)
for value in cltc_mean:
ax[k, 1].axvline(x=value, color='m', linestyle='--', linewidth=3)
cltc_max = cluster(simi_max)
for value in cltc_max:
ax[k, 2].axvline(x=value, color='m', linestyle='--', linewidth=3)
'''绘制相似度均值与最大值均值'''
ax[k, 1].axvline(x=np.mean(matrix), color='r', linestyle='-', linewidth=3)
ax[k, 2].axvline(x=np.mean(simi_max), color='g', linestyle='-', linewidth=3)
'''绘制相似度最大值均值 - 均值'''
_, y_max = ax[k, 2].get_ylim() # 获取y轴范围
ax[k, 2].text(-0.1, 0.05*y_max, f"g-r={diff_max_mean:.3f}", fontsize=18, color='m')
plt.show()
# for i, box in enumerate(boxes):
# x1, y1, x2, y2, tid, score, cls, fid, bid = box
# imgpath = imgpaths[int(fid-1)]
# image = cv2.imread(imgpath)
# subimg = image[int(y1/2):int(y2/2), int(x1/2):int(x2/2), :]
# camerType, timeTamp, _, frameID = os.path.basename(imgpath).split('.')[0].split('_')
# subimgName = f"cam{camerType}_{i}_tid{int(tid)}_fid({int(fid)}, {frameID})_{simi_mean[i]:.3f}.png"
# imgpairs.append((subimgName, subimg))
# spath = os.path.join(subimgpath, subimgName)
# cv2.imwrite(spath, subimg)
# oldname = f"cam{camerType}_{i}_tid{int(tid)}_fid({int(fid)}, {frameID}).png"
# oldpath = os.path.join(subimgpath, oldname)
# if os.path.exists(oldpath):
# os.remove(oldpath)
if len(mean_values)==2:
mean_diff = abs(mean_values[1]-mean_values[0])
ax[0, 1].set_title(f"mean diff: {mean_diff:.3f}")
if len(max_values)==2:
max_values = abs(max_values[1]-max_values[0])
ax[0, 2].set_title(f"max diff: {max_values:.3f}")
try:
fig.suptitle(f"Similar: {Similar:.3f}", fontsize=16)
except Exception as e:
print(e)
print(f"Similar: {Similar}")
pltpath = os.path.join(subimgpath, f"hist_max_{kpercent}%_.png")
plt.savefig(pltpath)
pltpath1 = os.path.join(histpath, f"{evtname}_.png")
plt.savefig(pltpath1)
plt.close()
def main():
simi_matrix()
if __name__ == "__main__":
main()
# cluster()

View File

@ -0,0 +1,87 @@
# import torch
# import torchvision.transforms as T
#
#
# class Config:
# # network settings
# backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
# metric = 'arcface' # [cosface, arcface]
# cbam = True
# embedding_size = 256
# drop_ratio = 0.5
# img_size = 224
#
# batch_size = 8
#
# # data preprocess
# # input_shape = [1, 128, 128]
# """transforms.RandomCrop(size),
# transforms.RandomVerticalFlip(p=0.5),
# transforms.RandomHorizontalFlip(),
# RandomRotate(15, 0.3),
# # RandomGaussianBlur()"""
#
# train_transform = T.Compose([
# T.ToTensor(),
# T.Resize((img_size, img_size)),
# # T.RandomCrop(img_size),
# # T.RandomHorizontalFlip(p=0.5),
# T.RandomRotation(180),
# T.ColorJitter(brightness=0.5),
# T.ConvertImageDtype(torch.float32),
# T.Normalize(mean=[0.5], std=[0.5]),
# ])
# test_transform = T.Compose([
# T.ToTensor(),
# T.Resize((img_size, img_size)),
# T.ConvertImageDtype(torch.float32),
# T.Normalize(mean=[0.5], std=[0.5]),
# ])
#
# # dataset
# train_root = './data/2250_train/train' # 初始筛选过一次的数据集
# # train_root = './data/0612_train/train'
# test_root = "./data/2250_train/val/"
# # test_root = "./data/0612_train/val"
# test_list = "./data/2250_train/val_pair.txt"
#
# test_group_json = "./2250_train/cross_same_0508.json"
#
#
# # test_list = "./data/test_data_100/val_pair.txt"
#
# # training settings
# checkpoints = "checkpoints/resnet18_0613/" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
# restore = False
# # restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth" # best_resnet18_1491_0306.pth
# restore_model = "checkpoints/resnet18_0515/best.pth" # best_resnet18_1491_0306.pth
#
# # test_model = "checkpoints/renet18_2250_0314/best_resnet18_2250_0314.pth"
# testbackbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5]
# test_val = "D:/比对/cl"
# # test_val = "./data/test_data_100"
#
# # test_model = "checkpoints/zhanting_res_801.pth"
# test_model = "checkpoints/resnet18_0515/v11.pth"
#
#
#
# train_batch_size = 512 # 256
# test_batch_size = 256 # 256
#
# epoch = 300
# optimizer = 'sgd' # ['sgd', 'adam']
# lr = 1.5e-2 # 1e-2
# lr_step = 5 # 10
# lr_decay = 0.95 # 0.98
# weight_decay = 5e-4
# loss = 'cross_entropy' # ['focal_loss', 'cross_entropy']
# # device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#
# pin_memory = True # if memory is large, set it True to speed up a bit
# num_workers = 4 # dataloader
#
# group_test = True
#
# config = Config()

View File

@ -0,0 +1,547 @@
# -*- coding: utf-8 -*-
"""
@author: LiChen
"""
import numpy as np
import torch
from pathlib import Path
from utils.config import config as cfg
curpath = Path(__file__).resolve().parents[0]
class FeatsInterface:
def __init__(self, resnetModel=None):
self.device = cfg.device
self.transform = cfg.test_transform
self.batch_size = cfg.batch_size
self.embedding_size = cfg.embedding_size
assert resnetModel is not None, "resnetModel is None"
self.model = resnetModel
print(f"Model type: {type(self.model)}")
def inference(self, images, detections=None):
'''
如果是BGR需要转变为RGB格式
'''
if isinstance(images, np.ndarray):
imgs, features = self.inference_image(images, detections)
return imgs, features
batch_patches = []
patches = []
for i, img in enumerate(images):
img = img.copy()
patch = self.transform(img)
if str(self.device) != "cpu":
# patch = patch.to(device=self.device).half()
patch = patch.to(device=self.device)
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (i + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred = self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return features
def inference_image(self, image, detections):
H, W, _ = np.shape(image)
batch_patches = []
patches = []
imgs = []
for d in range(np.size(detections, 0)):
tlbr = detections[d, :4].astype(np.int_)
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
img = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
imgs.append(img)
img1 = img[:, :, ::-1].copy() # the model expects RGB inputs
patch = self.transform(img1)
# patch = patch.to(device=self.device).half()
if str(self.device) != "cpu":
# patch = patch.to(device=self.device).half()
patch = patch.to(device=self.device)
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (d + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred = self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return imgs, features
# def unique_image(pair_list) -> set:
# """Return unique image path in pair_list.txt"""
# with open(pair_list, 'r') as fd:
# pairs = fd.readlines()
# unique = set()
# for pair in pairs:
# id1, id2, _ = pair.split()
# unique.add(id1)
# unique.add(id2)
# return unique
#
#
# def group_image(images: set, batch) -> list:
# """Group image paths by batch size"""
# images = list(images)
# size = len(images)
# res = []
# for i in range(0, size, batch):
# end = min(batch + i, size)
# res.append(images[i: end])
# return res
#
#
# def _preprocess(images: list, transform) -> torch.Tensor:
# res = []
# for img in images:
# im = Image.open(img)
# im = transform(im)
# res.append(im)
# # data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# # data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
# data = torch.stack(res)
# return data
#
#
# def test_preprocess(images: list, transform) -> torch.Tensor:
# res = []
# for img in images:
# im = Image.open(img)
# im = transform(im)
# res.append(im)
# # data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# # data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
# data = torch.stack(res)
# return data
#
#
# def featurize(images: list, transform, net, device, train=False) -> dict:
# """featurize each image and save into a dictionary
# Args:
# images: image paths
# transform: test transform
# net: pretrained model
# device: cpu or cuda
# Returns:
# Dict (key: imagePath, value: feature)
# """
# if train:
# data = _preprocess(images, transform)
# data = data.to(device)
# net = net.to(device)
# with torch.no_grad():
# features = net(data)
# res = {img: feature for (img, feature) in zip(images, features)}
# else:
# data = test_preprocess(images, transform)
# data = data.to(device)
# net = net.to(device)
# with torch.no_grad():
# features = net(data)
# res = {img: feature for (img, feature) in zip(images, features)}
# return res
#
#
# # def inference_image(images: list, transform, net, device, bs=16, embedding_size=256) -> dict:
# # batch_patches = []
# # patches = []
# # for d, img in enumerate(images):
# # img = Image.open(img)
# # patch = transform(img)
#
# # if str(device) != "cpu":
# # patch = patch.to(device).half()
# # else:
# # patch = patch.to(device)
#
# # patches.append(patch)
# # if (d + 1) % bs == 0:
# # patches = torch.stack(patches, dim=0)
# # batch_patches.append(patches)
# # patches = []
#
# # if len(patches):
# # patches = torch.stack(patches, dim=0)
# # batch_patches.append(patches)
#
# # features = np.zeros((0, embedding_size), dtype=np.float32)
# # for patches in batch_patches:
# # pred = net(patches)
# # pred[torch.isinf(pred)] = 1.0
# # feat = pred.cpu().data.numpy()
# # features = np.vstack((features, feat))
#
#
# # return features
#
#
# def featurize_1(images: list, transform, net, device, train=False) -> dict:
# """featurize each image and save into a dictionary
# Args:
# images: image paths
# transform: test transform
# net: pretrained model
# device: cpu or cuda
# Returns:
# Dict (key: imagePath, value: feature)
# """
#
# data = test_preprocess(images, transform)
# data = data.to(device)
# net = net.to(device)
# with torch.no_grad():
# features = net(data).data.numpy()
#
# return features
#
#
# def cosin_metric(x1, x2):
# return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
#
#
# def threshold_search(y_score, y_true):
# y_score = np.asarray(y_score)
# y_true = np.asarray(y_true)
# best_acc = 0
# best_th = 0
# for i in range(len(y_score)):
# th = y_score[i]
# y_test = (y_score >= th)
# acc = np.mean((y_test == y_true).astype(int))
# if acc > best_acc:
# best_acc = acc
# best_th = th
# return best_acc, best_th
#
#
# def showgrid(recall, recall_TN, PrecisePos, PreciseNeg):
# x = np.linspace(start=-1.0, stop=1.0, num=50, endpoint=True).tolist()
# plt.figure(figsize=(10, 6))
# plt.plot(x, recall, color='red', label='recall')
# plt.plot(x, recall_TN, color='black', label='recall_TN')
# plt.plot(x, PrecisePos, color='blue', label='PrecisePos')
# plt.plot(x, PreciseNeg, color='green', label='PreciseNeg')
# plt.legend()
# plt.xlabel('threshold')
# # plt.ylabel('Similarity')
# plt.grid(True, linestyle='--', alpha=0.5)
# plt.savefig('accuracy_recall_grid.png')
# plt.show()
# plt.close()
#
#
# def compute_accuracy_recall(score, labels):
# th = 0.1
# squence = np.linspace(-1, 1, num=50)
# # squence = [0.4]
# recall, PrecisePos, PreciseNeg, recall_TN = [], [], [], []
# for th in squence:
# t_score = (score > th)
# t_labels = (labels == 1)
# # print(t_score)
# # print(t_labels)
# TP = np.sum(np.logical_and(t_score, t_labels))
# FN = np.sum(np.logical_and(np.logical_not(t_score), t_labels))
# f_score = (score < th)
# f_labels = (labels == 0)
# TN = np.sum(np.logical_and(f_score, f_labels))
# FP = np.sum(np.logical_and(np.logical_not(f_score), f_labels))
# print("Threshold:{} TP:{},FP:{},TN:{},FN:{}".format(th, TP, FP, TN, FN))
#
# PrecisePos.append(0 if TP / (TP + FP) == 'nan' else TP / (TP + FP))
# PreciseNeg.append(0 if TN == 0 else TN / (TN + FN))
# recall.append(0 if TP == 0 else TP / (TP + FN))
# recall_TN.append(0 if TN == 0 else TN / (TN + FP))
# showgrid(recall, recall_TN, PrecisePos, PreciseNeg)
#
#
# def compute_accuracy(feature_dict, pair_list, test_root):
# with open(pair_list, 'r') as f:
# pairs = f.readlines()
#
# similarities = []
# labels = []
# for pair in pairs:
# img1, img2, label = pair.split()
# img1 = osp.join(test_root, img1)
# img2 = osp.join(test_root, img2)
# feature1 = feature_dict[img1].cpu().numpy()
# feature2 = feature_dict[img2].cpu().numpy()
# label = int(label)
#
# similarity = cosin_metric(feature1, feature2)
# similarities.append(similarity)
# labels.append(label)
#
# accuracy, threshold = threshold_search(similarities, labels)
# # print('similarities >> {}'.format(similarities))
# # print('labels >> {}'.format(labels))
# compute_accuracy_recall(np.array(similarities), np.array(labels))
# return accuracy, threshold
# def deal_group_pair(pairList1, pairList2):
# allsimilarity = []
# one_similarity = []
# for pair1 in pairList1:
# for pair2 in pairList2:
# similarity = cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy())
# one_similarity.append(similarity)
# allsimilarity.append(max(one_similarity)) # 最大值
# # allsimilarity.append(sum(one_similarity)/len(one_similarity)) # 均值
# # allsimilarity.append(statistics.median(one_similarity)) # 中位数
# # print(allsimilarity)
# # print(labels)
# return allsimilarity
# def compute_group_accuracy(content_list_read):
# allSimilarity, allLabel = [], []
# for data_loaded in content_list_read:
# one_group_list = []
# for i in range(2):
# images = [osp.join(conf.test_val, img) for img in data_loaded[i]]
# group = group_image(images, conf.test_batch_size)
# d = featurize(group[0], conf.test_transform, model, conf.device)
# one_group_list.append(d.values())
# similarity = deal_group_pair(one_group_list[0], one_group_list[1])
# allLabel.append(data_loaded[-1])
# allSimilarity.extend(similarity)
# # print(allSimilarity)
# # print(allLabel)
# return allSimilarity, allLabel
# def compute_contrast_accuracy(content_list_read):
# npairs = 50
#
# same_folder_pairs = content_list_read['same_folder_pairs']
# cross_folder_pairs = content_list_read['cross_folder_pairs']
#
# npairs = min((len(same_folder_pairs), len(cross_folder_pairs)))
#
# Encoder = FeatsInterface(conf)
#
# same_pairs = same_folder_pairs[:npairs]
# cross_pairs = cross_folder_pairs[:npairs]
#
# same_pairs_similarity = []
# for i in range(len(same_pairs)):
# images_a = [osp.join(conf.test_val, img) for img in same_pairs[i][0]]
# images_b = [osp.join(conf.test_val, img) for img in same_pairs[i][1]]
#
# feats_a = Encoder.inference(images_a)
# feats_b = Encoder.inference(images_b)
# # matrix = 1- np.maximum(0.0, cdist(feats_a, feats_b, 'cosine'))
# matrix = 1 - cdist(feats_a, feats_b, 'cosine')
#
# feats_am = np.mean(feats_a, axis=0, keepdims=True)
# feats_bm = np.mean(feats_b, axis=0, keepdims=True)
# matrixm = 1 - np.maximum(0.0, cdist(feats_am, feats_bm, 'cosine'))
#
# same_pairs_similarity.append(np.mean(matrix))
#
# '''保存相同 Barcode 图像对'''
# # foldi = os.path.join('./result/same', f'{i}')
# # if os.path.exists(foldi):
# # shutil.rmtree(foldi)
# # os.makedirs(foldi)
# # else:
# # os.makedirs(foldi)
# # for ipt in range(len(images_a)):
# # source_path = images_a[ipt]
# # destination_path = os.path.join(foldi, f'a_{ipt}.png')
# # shutil.copy2(source_path, destination_path)
# # for ipt in range(len(images_b)):
# # source_path = images_b[ipt]
# # destination_path = os.path.join(foldi, f'b_{ipt}.png')
# # shutil.copy2(source_path, destination_path)
#
# cross_pairs_similarity = []
# for i in range(len(cross_pairs)):
# images_a = [osp.join(conf.test_val, img) for img in cross_pairs[i][0]]
# images_b = [osp.join(conf.test_val, img) for img in cross_pairs[i][1]]
#
# feats_a = Encoder.inference(images_a)
# feats_b = Encoder.inference(images_b)
# # matrix = 1- np.maximum(0.0, cdist(feats_a, feats_b, 'cosine'))
# matrix = 1 - cdist(feats_a, feats_b, 'cosine')
#
# feats_am = np.mean(feats_a, axis=0, keepdims=True)
# feats_bm = np.mean(feats_b, axis=0, keepdims=True)
# matrixm = 1 - np.maximum(0.0, cdist(feats_am, feats_bm, 'cosine'))
#
# cross_pairs_similarity.append(np.mean(matrix))
#
# '''保存不同 Barcode 图像对'''
# # foldi = os.path.join('./result/cross', f'{i}')
# # if os.path.exists(foldi):
# # shutil.rmtree(foldi)
# # os.makedirs(foldi)
# # else:
# # os.makedirs(foldi)
# # for ipt in range(len(images_a)):
# # source_path = images_a[ipt]
# # destination_path = os.path.join(foldi, f'a_{ipt}.png')
# # shutil.copy2(source_path, destination_path)
# # for ipt in range(len(images_b)):
# # source_path = images_b[ipt]
# # destination_path = os.path.join(foldi, f'b_{ipt}.png')
# # shutil.copy2(source_path, destination_path)
#
# Thresh = np.linspace(-0.2, 1, 100)
#
# Same = np.array(same_pairs_similarity)
# Cross = np.array(cross_pairs_similarity)
#
# fig, axs = plt.subplots(2, 1)
# axs[0].hist(Same, bins=60, edgecolor='black')
# axs[0].set_xlim([-0.2, 1])
# axs[0].set_title('Same Barcode')
#
# axs[1].hist(Cross, bins=60, edgecolor='black')
# axs[1].set_xlim([-0.2, 1])
# axs[1].set_title('Cross Barcode')
#
# TPFN = len(Same)
# TNFP = len(Cross)
# Recall_Pos, Recall_Neg = [], []
# Precision_Pos, Precision_Neg = [], []
# Correct = []
# for th in Thresh:
# TP = np.sum(Same > th)
# FN = TPFN - TP
# TN = np.sum(Cross < th)
# FP = TNFP - TN
#
# Recall_Pos.append(TP / TPFN)
# Recall_Neg.append(TN / TNFP)
# Precision_Pos.append(TP / (TP + FP))
# Precision_Neg.append(TN / (TN + FN))
# Correct.append((TN + TP) / (TPFN + TNFP))
#
# fig, ax = plt.subplots()
# ax.plot(Thresh, Correct, 'r', label='Correct: (TN+TP)/(TPFN+TNFP)')
# ax.plot(Thresh, Recall_Pos, 'b', label='Recall_Pos: TP/TPFN')
# ax.plot(Thresh, Recall_Neg, 'g', label='Recall_Neg: TN/TNFP')
# ax.plot(Thresh, Precision_Pos, 'c', label='Precision_Pos: TP/(TP+FP)')
# ax.plot(Thresh, Precision_Neg, 'm', label='Precision_Neg: TN/(TN+FN)')
#
# ax.set_xlim([0, 1])
# ax.set_ylim([0, 1])
# ax.grid(True)
# ax.set_title('PrecisePos & PreciseNeg')
# ax.legend()
# plt.show()
#
# print("Haved done!!!")
#
#
# if __name__ == '__main__':
#
# # Network Setup
# if conf.testbackbone == 'resnet18':
# # model = ResIRSE(conf.img_size, conf.embedding_size, conf.drop_ratio).to(conf.device)
# model = resnet18().to(conf.device)
# # elif conf.testbackbone == 'resnet34':
# # model = resnet34().to(conf.device)
# # elif conf.testbackbone == 'resnet50':
# # model = resnet50().to(conf.device)
# # elif conf.testbackbone == 'mobilevit_s':
# # model = mobilevit_s().to(conf.device)
# # elif conf.testbackbone == 'mobilenetv3':
# # model = MobileNetV3_Small().to(conf.device)
# # elif conf.testbackbone == 'mobilenet_v1':
# # model = mobilenet_v1().to(conf.device)
# # elif conf.testbackbone == 'PPLCNET_x1_0':
# # model = PPLCNET_x1_0().to(conf.device)
# # elif conf.testbackbone == 'PPLCNET_x0_5':
# # model = PPLCNET_x0_5().to(conf.device)
# # elif conf.backbone == 'PPLCNET_x2_5':
# # model = PPLCNET_x2_5().to(conf.device)
# # elif conf.testbackbone == 'mobilenet_v2':
# # model = mobilenet_v2().to(conf.device)
# # elif conf.testbackbone == 'resnet14':
# # model = resnet14().to(conf.device)
# else:
# raise ValueError('Have not model {}'.format(conf.backbone))
#
# print('load model {} '.format(conf.testbackbone))
# # model = nn.DataParallel(model).to(conf.device)
# model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
# model.eval()
# if not conf.group_test:
# images = unique_image(conf.test_list)
# images = [osp.join(conf.test_val, img) for img in images]
#
# groups = group_image(images, conf.test_batch_size) ##根据batch_size取图片
#
# feature_dict = dict()
# for group in groups:
# d = featurize(group, conf.test_transform, model, conf.device)
# feature_dict.update(d)
# # print('feature_dict', feature_dict)
# accuracy, threshold = compute_accuracy(feature_dict, conf.test_list, conf.test_val)
#
# print(
# f"Test Model: {conf.test_model}\n"
# f"Accuracy: {accuracy:.3f}\n"
# f"Threshold: {threshold:.3f}\n"
# )
# elif conf.group_test:
# """
# conf.test_val: 测试数据集地址
# conf.test_group_json测试数据分组配置文件
# """
# filename = conf.test_group_json
#
# filename = "../cl/images_1.json"
# with open(filename, 'r', encoding='utf-8') as file:
# content_list_read = json.load(file)
#
# compute_contrast_accuracy(content_list_read)
# =============================================================================
# Similarity, Label = compute_group_accuracy(content_list_read)
# print('allSimilarity >> {}'.format(Similarity))
# print('allLabel >> {}'.format(Label))
# compute_accuracy_recall(np.array(Similarity), np.array(Label))
# # compute_group_accuracy(data_loaded)
#
# =============================================================================

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (my_env)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="lc@192.168.1.142:22 password">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N803" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.8 (my_env)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (my_env)" project-jdk-type="Python SDK" />
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/contrastInference.iml" filepath="$PROJECT_DIR$/.idea/contrastInference.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1 @@
# from .config import config

View File

@ -0,0 +1,84 @@
import torch
import torchvision.transforms as T
class Config:
# network settings
backbone = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
metric = 'softmax' # [cosface, arcface, softmax]
cbam = True
embedding_size = 256 # 256
drop_ratio = 0.5
img_size = 224
teacher = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
student = 'resnet'
# data preprocess
# input_shape = [1, 128, 128]
"""transforms.RandomCrop(size),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomHorizontalFlip(),
RandomRotate(15, 0.3),
# RandomGaussianBlur()"""
train_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
# T.RandomCrop(img_size*4//5),
# T.RandomHorizontalFlip(p=0.5),
T.RandomRotation(180),
T.ColorJitter(brightness=0.5),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
test_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
# dataset
train_root = './data/2250_train/train' # 初始筛选过一次的数据集
# train_root = './data/0625_train/train'
test_root = "./data/2250_train/val/"
# test_root = "./data/0625_train/val"
test_list = "./data/2250_train/val_pair.txt"
test_group_json = "./data/2250_train/cross_same.json"
# test_group_json = "./data/0625_train/cross_same.json"
# test_list = "./data/test_data_100/val_pair.txt"
# training settings
checkpoints = "checkpoints/vit_b_16_0815/" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
restore = True
# restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth" # best_resnet18_1491_0306.pth
restore_model = "checkpoints/vit_b_16_0730/best.pth" # best_resnet18_1491_0306.pth
# test_model = "./checkpoints/renet18_1887_0311/best_resnet18_1887_0311.pth"
testbackbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5]
# test_val = "./data/2250_train"
test_val = "./data/0625_train"
test_model = "checkpoints/resnet18_0721/best.pth"
train_batch_size = 128 # 256
test_batch_size = 256 # 256
epoch = 300
optimizer = 'adamw' # ['sgd', 'adam' 'adamw']
lr = 1e-3 # 1e-2
lr_step = 10 # 10
lr_decay = 0.95 # 0.98
weight_decay = 5e-4
loss = 'focal_loss' # ['focal_loss', 'cross_entropy']
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
pin_memory = True # if memory is large, set it True to speed up a bit
num_workers = 4 # dataloader
group_test = True
# group_test = False
config = Config()

View File

@ -0,0 +1,103 @@
import os
import os.path as osp
import torch
import numpy as np
from model import resnet18
from PIL import Image
from torch.nn.functional import softmax
from config import config as conf
import time
embedding_size = conf.embedding_size
img_size = conf.img_size
device = conf.device
def load_contrast_model():
model = resnet18().to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
print('load model {} '.format(conf.testbackbone))
return model
def group_image(imageDirs, batch) -> list:
images = []
"""Group image paths by batch size"""
with os.scandir(imageDirs) as entries:
for imgpth in entries:
print(imgpth)
images.append(os.sep.join([imageDirs, imgpth.name]))
print(f"{len(images)} images in {imageDirs}")
size = len(images)
res = []
for i in range(0, size, batch):
end = min(batch + i, size)
res.append(images[i: end])
return res
def test_preprocess(images: list, transform) -> torch.Tensor:
res = []
for img in images:
# print(img)
im = Image.open(img)
im = transform(im)
res.append(im)
# data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
data = torch.stack(res)
return data
def featurize(images: list, transform, net, device) -> dict:
"""featurize each image and save into a dictionary
Args:
images: image paths
transform: test transform
net: pretrained model
device: cpu or cuda
Returns:
Dict (key: imagePath, value: feature)
"""
data = test_preprocess(images, transform)
data = data.to(device)
net = net.to(device)
with torch.no_grad():
features = net(data)
# res = {img: feature for (img, feature) in zip(images, features)}
return features
if __name__ == '__main__':
# Network Setup
if conf.testbackbone == 'resnet18':
model = resnet18().to(device)
else:
raise ValueError('Have not model {}'.format(conf.backbone))
print('load model {} '.format(conf.testbackbone))
# model = nn.DataParallel(model).to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
# images = unique_image(conf.test_list)
# images = [osp.join(conf.test_val, img) for img in images]
# print('images', images)
# images = ['./data/2250_train/val/6920616313186/6920616313186_6920616313186_20240220-124502_53d2e103-ae3a-4689-b745-9d8723b770fe_front_returnGood_70f75407b7ae_31_01.jpg']
# groups = group_image(conf.test_val, conf.test_batch_size) ##根据batch_size取图片
groups = group_image('img_test', 1) ##根据batch_size取图片, 默认batch_size = 8
feature_dict = dict()
for group in groups:
s = time.time()
features = featurize(group, conf.test_transform, model, conf.device)
e = time.time()
print('time: {}'.format(e - s))
# out = softmax(features, dim=1).argmax(dim=1)
# print('d >>> {}'. format(out))
# feature_dict.update(d)

View File

@ -0,0 +1 @@
from .resnet_pre import resnet18, resnet34, resnet50, resnet14

View File

@ -0,0 +1,462 @@
import torch
import torch.nn as nn
from config import config as conf
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
# from .utils import load_state_dict_from_url
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.cam = cam
self.bam = bam
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16))
self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes)
self.sigmod = nn.Sigmoid()
if self.bam:
self.bam = SpatialAttention()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
if self.bam:
out = out*self.bam(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
self.cam = cam
self.bam = bam
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4))
self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion)
self.sigmod = nn.Sigmoid()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None, scale=0.75):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, int(64*scale), layers[0])
self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# print('poolBefore', x.shape)
x = self.avgpool(x)
# print('poolAfter', x.shape)
x = torch.flatten(x, 1)
# print('fcBefore',x.shape)
x = self.fc(x)
# print('fcAfter',x.shape)
return x
def forward(self, x):
return self._forward_impl(x)
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
# model = ResNet(block, layers, **kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls[arch],
# progress=progress)
# model.load_state_dict(state_dict, strict=False)
# return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
return model
def resnet14(pretrained=True, progress=True, **kwargs):
r"""ResNet-14 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress,
**kwargs)
def resnet18(pretrained=True, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=False, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=False, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=False, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=False, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

View File

@ -0,0 +1,215 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 3 12:05:19 2024
@author: ym
"""
import os
import time
# import torch
import pickle
# import json
import numpy as np
from PIL import Image
from feat_extract.config import config as conf
# from model import resnet18 as resnet18
from feat_extract.inference import FeatsInterface #, inference_image
IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png']
# def model_init(conf, mpath=None):
# '''======= 0. 配置特征提取模型地址 ======='''
# if mpath is None:
# model_path = conf.test_model
# else:
# model_path = mpath
# ##============ load resnet mdoel
# model = resnet18().to(conf.device)
# # model = nn.DataParallel(model).to(conf.device)
# model.load_state_dict(torch.load(model_path, map_location=conf.device))
# model.eval()
# print('load model {} '.format(conf.testbackbone))
# return model
def get_std_barcodeDict(bcdpath, savepath, bcdSet):
'''
inputs:
bcdpath: 已清洗的barcode样本图像如果barcode下有'base'文件夹,只选用该文件夹下图像
(default = r'\\192.168.1.28\share\已标注数据备份\对比数据\barcode\barcode_1771')
功能:
生成并保存只有一个key值的字典 {barcode: [imgpath1, imgpath1, ...]}
savepath: 字典存储地址文件名格式barcode.pickle
'''
# savepath = r'\\192.168.1.28\share\测试_202406\contrast\std_barcodes'
'''读取数据集中 barcode 列表'''
stdBarcodeList = []
for filename in os.listdir(bcdpath):
filepath = os.path.join(bcdpath, filename)
if not os.path.isdir(filepath) or not filename.isdigit() or len(filename)<8:
continue
if bcdSet is None:
stdBarcodeList.append(filename)
elif filename in bcdSet:
stdBarcodeList.append(filename)
bcdPaths = [(barcode, os.path.join(bcdpath, barcode)) for barcode in stdBarcodeList]
'''遍历数据集针对每一个barcode生成并保存字典{barcode: [imgpath1, imgpath1, ...]}'''
k = 0
errbarcodes = []
for barcode, bpath in bcdPaths:
pickpath = os.path.join(savepath, f"{barcode}.pickle")
if os.path.isfile(pickpath):
continue
stdBarcodeDict = {}
stdBarcodeDict[barcode] = []
for root, dirs, files in os.walk(bpath):
imgpaths = []
if "base" in dirs:
broot = os.path.join(root, "base")
for imgname in os.listdir(broot):
imgpath = os.path.join(broot, imgname)
file, ext = os.path.splitext(imgpath)
if ext not in IMG_FORMAT:
continue
imgpaths.append(imgpath)
stdBarcodeDict[barcode].extend(imgpaths)
break
else:
for imgname in files:
imgpath = os.path.join(root, imgname)
_, ext = os.path.splitext(imgpath)
if ext not in IMG_FORMAT: continue
imgpaths.append(imgpath)
stdBarcodeDict[barcode].extend(imgpaths)
pickpath = os.path.join(savepath, f"{barcode}.pickle")
with open(pickpath, 'wb') as f:
pickle.dump(stdBarcodeDict, f)
print(f"Barcode: {barcode}")
# k += 1
# if k == 10:
# break
print(f"Len of errbarcodes: {len(errbarcodes)}")
return
def stdfeat_infer(imgPath, featPath, bcdSet=None):
'''
inputs:
imgPath: 该文件夹下的 pickle 文件格式 {barcode: [imgpath1, imgpath1, ...]}
featPath: imgPath图像对应特征的存储地址
功能:
对 imgPath中图像进行特征提取生成只有一个key值的字典
{barcode: features}features.shape=(nsample, 256),并保存至 featPath 中
'''
# imgPath = r"\\192.168.1.28\share\测试_202406\contrast\std_barcodes"
# featPath = r"\\192.168.1.28\share\测试_202406\contrast\std_features"
stdBarcodeDict = {}
stdBarcodeDict_ft16 = {}
Encoder = FeatsInterface(conf)
'''4处同名: (1)barcode原始图像文件夹; (2)imgPath中的 .pickle 文件名;
(3)该pickle文件中字典的key值; (4)特征向量字典中的一个key值'''
k = 0
for filename in os.listdir(imgPath):
bcd, ext = os.path.splitext(filename)
filepath = os.path.join(imgPath, filename)
if ext != ".pickle": continue
if bcdSet is not None and bcd not in bcdSet:
continue
featpath = os.path.join(featPath, f"{bcd}.pickle")
if os.path.isfile(featpath):
continue
stdbDict = {}
t1 = time.time()
try:
with open(filepath, 'rb') as f:
bpDict = pickle.load(f)
for barcode, imgpaths in bpDict.items():
# feature = batch_inference(imgpaths, 8) #from vit distilled model of LiChen
# feature = inference_image(imgpaths, conf.test_transform, model, conf.device)
imgs = []
for d, imgpath in enumerate(imgpaths):
img = Image.open(imgpath)
imgs.append(img)
feature = Encoder.inference(imgs)
feature /= np.linalg.norm(feature, axis=1)[:, None]
# float16
feature_ft16 = feature.astype(np.float16)
feature_ft16 /= np.linalg.norm(feature_ft16, axis=1)[:, None]
# uint8, 两种策略1) 精度损失小, 2) 计算复杂度小
# feature_uint8, _ = ft16_to_uint8(feature_ft16)
feature_uint8 = (feature_ft16*128).astype(np.int8)
except Exception as e:
print(f"Error accured at: {filename}, with Exception is: {e}")
'''================ 保存单个barcode特征 ================'''
##================== float32
stdbDict["barcode"] = barcode
stdbDict["imgpaths"] = imgpaths
stdbDict["feats_ft32"] = feature
stdbDict["feats_ft16"] = feature_ft16
stdbDict["feats_uint8"] = feature_uint8
with open(featpath, 'wb') as f:
pickle.dump(stdbDict, f)
stdBarcodeDict[barcode] = feature
stdBarcodeDict_ft16[barcode] = feature_ft16
t2 = time.time()
print(f"Barcode: {barcode}, need time: {t2-t1:.1f} secs")
# k += 1
# if k == 10:
# break
return
def gen_bcd_features(imgpath, bcdpath, featpath, bcdSet=None):
''' 生成标准特征集 '''
'''1. 提取 imgpath 中样本地址,生成字典{barcode: [imgpath1, imgpath1, ...]}
并存储于: bcdpath, 格式为 barcode.pickle'''
get_std_barcodeDict(imgpath, bcdpath, bcdSet)
'''2. 特征提取,并保存至文件夹 featpath 中,也根据 bcdSet 交集执行'''
stdfeat_infer(bcdpath, featpath, bcdSet)
def main():
imgpath = r"\\192.168.1.28\share\数据\已完成数据\展厅数据\v1.0\比对数据\整理\zhantingBase"
bcdpath = r"D:\exhibition\dataset\bcdpath"
featpath = r"D:\exhibition\dataset\feats"
gen_bcd_features(imgpath, bcdpath, featpath)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,277 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 18 11:49:01 2024
@author: ym
"""
import os
import pickle
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from utils.event import ShoppingEvent
def init_eventdict(sourcePath, stype="data"):
'''stype: str,
'source': 由 videos 或 images 生成的 pickle 文件
'data': 从 data 文件中读取的现场运行数据
'''
k, errEvents = 0, []
for bname in os.listdir(sourcePath):
# bname = r"20241126-135911-bdf91cf9-3e9a-426d-94e8-ddf92238e175_6923555210479"
source_path = os.path.join(sourcePath, bname)
if stype=="data":
pickpath = os.path.join(eventDataPath, f"{bname}.pickle")
if not os.path.isdir(source_path) or os.path.isfile(pickpath):
continue
if stype=="source":
pickpath = os.path.join(eventDataPath, bname)
if not os.path.isfile(source_path) or os.path.isfile(pickpath):
continue
try:
event = ShoppingEvent(source_path, stype)
with open(pickpath, 'wb') as f:
pickle.dump(event, f)
print(bname)
except Exception as e:
errEvents.append(source_path)
print(e)
# k += 1
# if k==1:
# break
errfile = os.path.join(resultPath, 'error_events.txt')
with open(errfile, 'a', encoding='utf-8') as f:
for line in errEvents:
f.write(line + '\n')
def read_eventdict(eventDataPath):
evtDict = {}
for filename in os.listdir(eventDataPath):
evtname, ext = os.path.splitext(filename)
if ext != ".pickle": continue
evtpath = os.path.join(eventDataPath, filename)
with open(evtpath, 'rb') as f:
evtdata = pickle.load(f)
evtDict[evtname] = evtdata
return evtDict
def simi_calc(event, o2nevt, typee=None):
if typee == "11":
boxes1 = event.front_boxes
boxes2 = o2nevt.front_boxes
feat1 = event.front_feats
feat2 = o2nevt.front_feats
if typee == "10":
boxes1 = event.front_boxes
boxes2 = o2nevt.back_boxes
feat1 = event.front_feats
feat2 = o2nevt.back_feats
if typee == "00":
boxes1 = event.back_boxes
boxes2 = o2nevt.back_boxes
feat1 = event.back_feats
feat2 = o2nevt.back_feats
if typee == "01":
boxes1 = event.back_boxes
boxes2 = o2nevt.front_boxes
feat1 = event.back_feats
feat2 = o2nevt.front_feats
'''自定义事件特征选择'''
if typee==3:
feat1 = event.feats_compose
feat2 = o2nevt.feats_compose
if len(feat1) and len(feat2):
matrix = 1 - cdist(feat1[0], feat2[0], 'cosine')
simi = np.mean(matrix)
else:
simi = None
return simi
def one2n_pr(evtDicts, pattern=1):
'''
pattern:
1: process.data 中记录的相似度
2: 根据 process.data 中标记的 type 选择特征计算
3: 以其它方式选择特征计算
'''
tpevents, fnevents, fpevents, tnevents = [], [], [], []
tpsimi, fnsimi, tnsimi, fpsimi = [], [], [], []
errorFile_one2n = []
for evtname, event in evtDicts.items():
evt_names, evt_barcodes, evt_similars, evt_types = [], [], [], []
for ndict in event.one2n:
nname = ndict["event"]
barcode = ndict["barcode"]
similar = ndict["similar"]
typee = ndict["type"].strip()
evt_names.append(nname)
evt_barcodes.append(barcode)
evt_types.append(typee)
if pattern==1:
evt_similars.append(similar)
if pattern==2 or pattern==3:
o2n_evt = [evt for name, evt in evtDicts.items() if name.find(nname[:15])==0]
if len(o2n_evt)==1:
o2nevt = o2n_evt[0]
else:
continue
if pattern==2:
simival = simi_calc(event, o2nevt, typee)
if pattern==3:
simival = simi_calc(event, o2nevt, typee=pattern)
if simival==None:
continue
evt_similars.append(simival)
if len(evt_names)==len(evt_barcodes) and len(evt_barcodes)==len(evt_similars) \
and len(evt_similars)==len(evt_types) and len(evt_names)>0:
# maxsim = evt_similars[evt_similars.index(max(evt_similars))]
maxsim = max(evt_similars)
for i in range(len(evt_names)):
bcd, simi = evt_barcodes[i], evt_similars[i]
if bcd==event.barcode and simi==maxsim:
tpsimi.append(simi)
tpevents.append(evtname)
elif bcd==event.barcode and simi!=maxsim:
fnsimi.append(simi)
fnevents.append(evtname)
elif bcd!=event.barcode and simi!=maxsim:
tnsimi.append(simi)
tnevents.append(evtname)
elif bcd!=event.barcode and simi==maxsim and event.barcode in evt_barcodes:
fpsimi.append(simi)
fpevents.append(evtname)
else:
errorFile_one2n.append(evtname)
''' 1:n 数据存储,需根据相似度排序'''
PPrecise, PRecall = [], []
NPrecise, NRecall = [], []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
'''============================= 1:n 计算'''
TP = sum(np.array(tpsimi) >= th)
FP = sum(np.array(fpsimi) >= th)
FN = sum(np.array(fnsimi) < th)
TN = sum(np.array(tnsimi) < th)
PPrecise.append(TP/(TP+FP+1e-6))
PRecall.append(TP/(len(tpsimi)+len(fnsimi)+1e-6))
NPrecise.append(TN/(TN+FN+1e-6))
NRecall.append(TN/(len(tnsimi)+len(fpsimi)+1e-6))
'''4. ============================= 1:n 曲线,'''
fig, ax = plt.subplots()
ax.plot(Thresh, PPrecise, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecall, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPrecise, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecall, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:n Precise & Recall')
ax.set_xlabel(f"Event Num: {len(tpsimi)+len(fnsimi)}")
ax.legend()
plt.show()
## ============================= 1:n 直方图'''
fig, axes = plt.subplots(2, 2)
axes[0, 0].hist(tpsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 0].set_xlim([-0.2, 1])
axes[0, 0].set_title('TP')
axes[0, 1].hist(fpsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 1].set_xlim([-0.2, 1])
axes[0, 1].set_title('FP')
axes[1, 0].hist(tnsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 0].set_xlim([-0.2, 1])
axes[1, 0].set_title('TN')
axes[1, 1].hist(fnsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 1].set_xlim([-0.2, 1])
axes[1, 1].set_title('FN')
plt.show()
return fpevents
def main():
'''1. 生成事件字典并保存至 eventDataPath, 只需运行一次 '''
init_eventdict(eventSourcePath, stype="source")
'''2. 读取事件字典 '''
evtDicts = read_eventdict(eventDataPath)
'''3. 1:n 比对事件评估 '''
fpevents = one2n_pr(evtDicts, pattern=3)
fpErrFile = str(Path(resultPath).joinpath("one2n_fp_Error.txt"))
with open(fpErrFile, "w") as file:
for item in fpevents:
file.write(item + "\n")
if __name__ == '__main__':
eventSourcePath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\ShoppingDict_pkfile"
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\contrast"
eventDataPath = os.path.join(resultPath, "evtobjs")
similPath = os.path.join(resultPath, "simidata")
if not os.path.exists(eventDataPath):
os.makedirs(eventDataPath)
if not os.path.exists(similPath):
os.makedirs(similPath)
main()

View File

@ -0,0 +1,520 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 11 11:57:30 2024
永辉现场试验输出数据的 1:1 性能评估
适用于202410前数据保存版本的需调用 OneToOneCompare.txt
@author: ym
"""
import os
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.read_data import read_similar
def read_one2one_data(filepath):
simiList = []
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
split_flag = False
simi_dict = {}
for i, line in enumerate(lines):
line = line.strip()
if not line:
if len(simi_dict): simiList.append(simi_dict)
simi_dict = {}
continue
label = line.split(':')[0].strip()
value = line.split(':')[1].strip()
if label.find("SeqDir") >= 0:
simi_dict["SeqDir"] = value
if label.isdigit() and len(label) >= 8:
simi_max, simi_min = value.strip(',').split('.')
simi_dict["barcode"] = label
simi_dict["simi_max"] = float(simi_max) / 1000
simi_dict["simi_min"] = float(simi_min) / 1000
if len(simi_dict): simiList.append(simi_dict)
return simiList
def plot_pr_curve(matrix):
simimax, simimean = [], []
need_analysis = []
for simidict in matrix:
simimax.append(simidict["simi_max"])
simimean.append(simidict["simi_min"])
if simidict["simi_max"]>0.6:
need_analysis.append(simidict)
simimax = np.array(simimax)
simimean = np.array(simimean)
TPFN_max = len(simimax)
TPFN_mean = len(simimean)
fig, axs = plt.subplots(2, 1)
axs[0].hist(simimax, bins=60, edgecolor='black')
axs[0].set_xlim([-0.2, 1])
axs[0].set_title(f'Same Barcode, Num: {TPFN_max}')
axs[1].hist(simimean, bins=60, edgecolor='black')
axs[1].set_xlim([-0.2, 1])
axs[1].set_title(f'Cross Barcode, Num: {TPFN_mean}')
# plt.savefig(f'./result/{file}_hist.png') # svg, png, pdf
Recall_Neg = []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
TN = np.sum(simimax < th)
Recall_Neg.append(TN/TPFN_max)
fig, ax = plt.subplots()
ax.plot(Thresh, Recall_Neg, 'b', label='Recall_Pos: TP/TPFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('Positive recall')
ax.set_xlabel(f"Num: {TPFN_max}")
ax.legend()
plt.show()
# plt.savefig(f'./result/{file}_pr.png') # svg, png, pdf
print("Have done!")
pass
def test_compare():
filepaths = [r"\\192.168.1.28\share\测试_202406\0913_扫A放B\0913_1\OneToOneCompare.txt",
r"\\192.168.1.28\share\测试_202406\0913_扫A放B\0913_2\OneToOneCompare.txt",
r"\\192.168.1.28\share\测试_202406\0914_扫A放B\0914_1\OneToOneCompare.txt",
r"\\192.168.1.28\share\测试_202406\0914_扫A放B\0914_2\OneToOneCompare.txt"
]
simiList = []
for fp in filepaths:
slist = read_one2one_data(fp)
simiList.extend(slist)
plot_pr_curve(simiList)
def contrast_pr(paths):
'''
1:1
'''
paths = Path(paths)
evtpaths = []
for p in paths.iterdir():
condt1 = p.is_dir()
condt2 = len(p.name.split('_'))>=2
condt3 = len(p.name.split('_')[-1])>8
condt4 = p.name.split('_')[-1].isdigit()
if condt1 and condt2 and condt3 and condt4:
evtpaths.append(p)
# evtpaths = [p for p in paths.iterdir() if p.is_dir() and len(p.name.split('_'))>=2 and len(p.name.split('_')[-1])>8]
# evtpaths = [p for p in paths.iterdir() if p.is_dir()]
events, similars = [], []
##===================================== 扫A放A, 扫A放B场景()
one2oneAA, one2oneAB = [], []
one2SNAA, one2SNAB = [], []
##===================================== 应用于 11
_tp_events, _fn_events, _fp_events, _tn_events = [], [], [], []
_tp_simi, _fn_simi, _tn_simi, _fp_simi = [], [], [], []
##===================================== 应用于 1SN
tp_events, fn_events, fp_events, tn_events = [], [], [], []
tp_simi, fn_simi, tn_simi, fp_simi = [], [], [], []
##===================================== 应用于1:n
tpevents, fnevents, fpevents, tnevents = [], [], [], []
tpsimi, fnsimi, tnsimi, fpsimi = [], [], [], []
##===================================== barcodes总数、比对错误事件
bcdList = []
one2onePath, one2onePath1 = [], []
one2SNPath, one2SNPath1 = [], []
one2nPath = []
errorFile_one2one, errorFile_one2SN, errorFile_one2n = [], [], []
for path in evtpaths:
barcode = path.stem.split('_')[-1]
datapath = path.joinpath('process.data')
if not barcode.isdigit() or len(barcode)<10: continue
if not datapath.is_file(): continue
bcdList.append(barcode)
try:
SimiDict = read_similar(datapath)
except Exception as e:
print(f"{path.stem}, Error: {e}")
'''放入为 1:1相似度取最大值取出时为 1:SN, 相似度取均值'''
one2one = SimiDict['one2one']
one2SN = SimiDict['one2SN']
one2n = SimiDict['one2n']
'''================== 0. 1:1 ==================='''
barcodes, similars = [], []
for dt in one2one:
one2onePath.append((path.stem))
if dt['similar']==0:
one2onePath1.append((path.stem))
continue
barcodes.append(dt['barcode'])
similars.append(dt['similar'])
if len(barcodes)==len(similars) and len(barcodes)!=0:
## 扫A放A, 扫A放B场景
simAA = [similars[i] for i in range(len(barcodes)) if barcodes[i]==barcode]
simAB = [similars[i] for i in range(len(barcodes)) if barcodes[i]!=barcode]
one2oneAA.extend(simAA)
one2oneAB.extend(simAB)
## 相似度排序barcode相等且排名第一为TP适用于多的barcode相似度比较
max_idx = similars.index(max(similars))
max_sim = similars[max_idx]
# max_bcd = barcodes[max_idx]
for i in range(len(one2one)):
bcd, simi = barcodes[i], similars[i]
if bcd==barcode and simi==max_sim:
_tp_simi.append(simi)
_tp_events.append(path.stem)
elif bcd==barcode and simi!=max_sim:
_fn_simi.append(simi)
_fn_events.append(path.stem)
elif bcd!=barcode and simi!=max_sim:
_tn_simi.append(simi)
_tn_events.append(path.stem)
elif bcd!=barcode and simi==max_sim and barcode in barcodes:
_fp_simi.append(simi)
_fp_events.append(path.stem)
else:
errorFile_one2one.append(path.stem)
'''================== 2. 取出场景下的 1 : Small N ==================='''
barcodes, similars = [], []
for dt in one2SN:
barcodes.append(dt['barcode'])
similars.append(dt['similar'])
if len(barcodes)==len(similars) and len(barcodes)!=0:
## 扫A放A, 扫A放B场景
simAA = [similars[i] for i in range(len(barcodes)) if barcodes[i]==barcode]
simAB = [similars[i] for i in range(len(barcodes)) if barcodes[i]!=barcode]
one2SNAA.extend(simAA)
one2SNAB.extend(simAB)
one2SNPath.append(path.stem)
if len(simAA)==0:
one2SNPath1.append(path.stem)
## 相似度排序barcode相等且排名第一为TP适用于多的barcode相似度比较
max_idx = similars.index(max(similars))
max_sim = similars[max_idx]
# max_bcd = barcodes[max_idx]
for i in range(len(one2SN)):
bcd, simi = barcodes[i], similars[i]
if bcd==barcode and simi==max_sim:
tp_simi.append(simi)
tp_events.append(path.stem)
elif bcd==barcode and simi!=max_sim:
fn_simi.append(simi)
fn_events.append(path.stem)
elif bcd!=barcode and simi!=max_sim:
tn_simi.append(simi)
tn_events.append(path.stem)
elif bcd!=barcode and simi==max_sim and barcode in barcodes:
fp_simi.append(simi)
fp_events.append(path.stem)
else:
errorFile_one2SN.append(path.stem)
'''===================== 3. 取出场景下的 1:n ========================'''
events, evt_barcodes, evt_similars, evt_types = [], [], [], []
for dt in one2n:
events.append(dt["event"])
evt_barcodes.append(dt["barcode"])
evt_similars.append(dt["similar"])
evt_types.append(dt["type"])
if len(events)==len(evt_barcodes) and len(evt_barcodes)==len(evt_similars) \
and len(evt_similars)==len(evt_types) and len(events)>0:
one2nPath.append(path.stem)
maxsim = evt_similars[evt_similars.index(max(evt_similars))]
for i in range(len(one2n)):
bcd, simi = evt_barcodes[i], evt_similars[i]
if bcd==barcode and simi==maxsim:
tpsimi.append(simi)
tpevents.append(path.stem)
elif bcd==barcode and simi!=maxsim:
fnsimi.append(simi)
fnevents.append(path.stem)
elif bcd!=barcode and simi!=maxsim:
tnsimi.append(simi)
tnevents.append(path.stem)
elif bcd!=barcode and simi==maxsim and barcode in evt_barcodes:
fpsimi.append(simi)
fpevents.append(path.stem)
else:
errorFile_one2n.append(path.stem)
'''命名规则:
1:1 (max) 1:1 (max) 1:n 1:N
_TP TP_ TP TPX
_PPrecise PPrecise_ PPrecise PPreciseX
tpsimi tp_simi
'''
''' 1:1 数据存储, 相似度计算方式:最大值、均值'''
_PPrecise, _PRecall = [], []
_NPrecise, _NRecall = [], []
PPrecise_, PRecall_ = [], []
NPrecise_, NRecall_ = [], []
''' 1:SN 数据存储,需根据相似度排序'''
PPreciseX, PRecallX = [], []
NPreciseX, NRecallX = [], []
''' 1:n 数据存储,需根据相似度排序'''
PPrecise, PRecall = [], []
NPrecise, NRecall = [], []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
'''(Precise, Recall) 计算方式, 若 1:1 与 1:SN 相似度选择方式相同,则可以合并'''
'''===================================== 1:1 最大值'''
_TP = sum(np.array(one2oneAA) >= th)
_FP = sum(np.array(one2oneAB) >= th)
_FN = sum(np.array(one2oneAA) < th)
_TN = sum(np.array(one2oneAB) < th)
_PPrecise.append(_TP/(_TP+_FP+1e-6))
_PRecall.append(_TP/(len(one2oneAA)+1e-6))
_NPrecise.append(_TN/(_TN+_FN+1e-6))
_NRecall.append(_TN/(len(one2oneAB)+1e-6))
'''===================================== 1:SN 均值'''
TP_ = sum(np.array(one2SNAA) >= th)
FP_ = sum(np.array(one2SNAB) >= th)
FN_ = sum(np.array(one2SNAA) < th)
TN_ = sum(np.array(one2SNAB) < th)
PPrecise_.append(TP_/(TP_+FP_+1e-6))
PRecall_.append(TP_/(len(one2SNAA)+1e-6))
NPrecise_.append(TN_/(TN_+FN_+1e-6))
NRecall_.append(TN_/(len(one2SNAB)+1e-6))
'''适用于 (Precise, Recall) 计算方式多个相似度计算并排序barcode相等且排名第一为 TP '''
'''===================================== 1:SN '''
TPX = sum(np.array(tp_simi) >= th)
FPX = sum(np.array(fp_simi) >= th)
FNX = sum(np.array(fn_simi) < th)
TNX = sum(np.array(tn_simi) < th)
PPreciseX.append(TPX/(TPX+FPX+1e-6))
PRecallX.append(TPX/(len(tp_simi)+len(fn_simi)+1e-6))
NPreciseX.append(TNX/(TNX+FNX+1e-6))
NRecallX.append(TNX/(len(tn_simi)+len(fp_simi)+1e-6))
'''===================================== 1:n'''
TP = sum(np.array(tpsimi) >= th)
FP = sum(np.array(fpsimi) >= th)
FN = sum(np.array(fnsimi) < th)
TN = sum(np.array(tnsimi) < th)
PPrecise.append(TP/(TP+FP+1e-6))
PRecall.append(TP/(len(tpsimi)+len(fnsimi)+1e-6))
NPrecise.append(TN/(TN+FN+1e-6))
NRecall.append(TN/(len(tnsimi)+len(fpsimi)+1e-6))
'''1. ============================= 1:1 最大值方案 曲线'''
fig, ax = plt.subplots()
ax.plot(Thresh, _PPrecise, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, _PRecall, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, _NPrecise, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, _NRecall, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:1 Precise & Recall')
ax.set_xlabel(f"Event Num: {len(one2oneAA)+len(one2oneAB)}")
ax.legend()
plt.show()
## ============================= 1:1 最大值方案 直方图'''
fig, axes = plt.subplots(2, 1)
axes[0].hist(np.array(one2oneAA), bins=60, edgecolor='black')
axes[0].set_xlim([-0.2, 1])
axes[0].set_title('AA')
axes[1].hist(np.array(one2oneAB), bins=60, edgecolor='black')
axes[1].set_xlim([-0.2, 1])
axes[1].set_title('BB')
plt.show()
'''2. ============================= 1:1 均值方案 曲线'''
fig, ax = plt.subplots()
ax.plot(Thresh, PPrecise_, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecall_, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPrecise_, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecall_, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:1 Precise & Recall')
ax.set_xlabel(f"Event Num: {len(one2SNAA)}")
ax.legend()
plt.show()
## ============================= 1:1 均值方案 直方图'''
fig, axes = plt.subplots(2, 1)
axes[0].hist(np.array(one2SNAA), bins=60, edgecolor='black')
axes[0].set_xlim([-0.2, 1])
axes[0].set_title('AA')
axes[0].set_xlabel(f"Event Num: {len(one2SNAA)}")
axes[1].hist(np.array(one2SNAB), bins=60, edgecolor='black')
axes[1].set_xlim([-0.2, 1])
axes[1].set_title('BB')
axes[1].set_xlabel(f"Event Num: {len(one2SNAB)}")
plt.show()
''''3. ============================= 1:SN 曲线'''
fig, ax = plt.subplots()
ax.plot(Thresh, PPreciseX, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecallX, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPreciseX, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecallX, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:SN Precise & Recall')
ax.set_xlabel(f"Event Num: {len(one2SNAA)}")
ax.legend()
plt.show()
## ============================= 1:N 展厅 直方图'''
fig, axes = plt.subplots(2, 2)
axes[0, 0].hist(tp_simi, bins=60, edgecolor='black')
axes[0, 0].set_xlim([-0.2, 1])
axes[0, 0].set_title(f'TP({len(tp_simi)})')
axes[0, 1].hist(fp_simi, bins=60, edgecolor='black')
axes[0, 1].set_xlim([-0.2, 1])
axes[0, 1].set_title(f'FP({len(fp_simi)})')
axes[1, 0].hist(tn_simi, bins=60, edgecolor='black')
axes[1, 0].set_xlim([-0.2, 1])
axes[1, 0].set_title(f'TN({len(tn_simi)})')
axes[1, 1].hist(fn_simi, bins=60, edgecolor='black')
axes[1, 1].set_xlim([-0.2, 1])
axes[1, 1].set_title(f'FN({len(fn_simi)})')
plt.show()
'''4. ============================= 1:n 曲线,'''
fig, ax = plt.subplots()
ax.plot(Thresh, PPrecise, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecall, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPrecise, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecall, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:n Precise & Recall')
ax.set_xlabel(f"Event Num: {len(tpsimi)+len(fnsimi)}")
ax.legend()
plt.show()
## ============================= 1:n 直方图'''
fig, axes = plt.subplots(2, 2)
axes[0, 0].hist(tpsimi, bins=60, edgecolor='black')
axes[0, 0].set_xlim([-0.2, 1])
axes[0, 0].set_title(f'TP({len(tpsimi)})')
axes[0, 1].hist(fpsimi, bins=60, edgecolor='black')
axes[0, 1].set_xlim([-0.2, 1])
axes[0, 1].set_title(f'FP({len(fpsimi)})')
axes[1, 0].hist(tnsimi, bins=60, edgecolor='black')
axes[1, 0].set_xlim([-0.2, 1])
axes[1, 0].set_title(f'TN({len(tnsimi)})')
axes[1, 1].hist(fnsimi, bins=60, edgecolor='black')
axes[1, 1].set_xlim([-0.2, 1])
axes[1, 1].set_title(f'FN({len(fnsimi)})')
plt.show()
fpsnErrFile = str(paths.joinpath("one2SN_Error.txt"))
with open(fpsnErrFile, "w") as file:
for item in fp_events:
file.write(item + "\n")
fpErrFile = str(paths.joinpath("one2n_Error.txt"))
with open(fpErrFile, "w") as file:
for item in fpevents:
file.write(item + "\n")
# bcdSet = set(bcdList)
# one2nErrFile = str(paths.joinpath("one_2_Small_n_Error.txt"))
# with open(one2nErrFile, "w") as file:
# for item in fnevents:
# file.write(item + "\n")
# one2NErrFile = str(paths.joinpath("one_2_Big_N_Error.txt"))
# with open(one2NErrFile, "w") as file:
# for item in fn_events:
# file.write(item + "\n")
print('Done!')
if __name__ == "__main__":
evtpaths = r"D:\全实时\source_data\2024122416"
contrast_pr(evtpaths)

View File

@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 23 13:58:13 2024
writting for selectting std subimgs to Wuhuaqi
@author: ym
"""
import os
import time
# import torch
import pickle
# import json
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from feat_extract.config import config as conf
# from model import resnet18 as resnet18
from feat_extract.inference import FeatsInterface #, inference_image
IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png']
def gen_features(imgpath):
Encoder = FeatsInterface(conf)
imgs, imgnames = [], []
for filename in os.listdir(imgpath):
file, ext = os.path.splitext(filename)
if ext not in IMG_FORMAT: continue
fpath = os.path.join(imgpath, filename)
img = Image.open(fpath)
imgs.append(img)
filelist = file.split("_")
newname = "_".join([filelist[0],filelist[1], filelist[2], filelist[-3], filelist[-2], filelist[-1]])
# imgnames.append(newname)
imgnames.append(file)
features = Encoder.inference(imgs)
features /= np.linalg.norm(features, axis=1)[:, None]
return features, imgnames
def top_p_percent_indices(matrix, p):
"""
Finds the indices of the top p% largest elements in a 2D matrix.
Args:
matrix (np.ndarray): A 2D NumPy array.
p: int, 0-100
Returns:
List[Tuple[int, int]]: A list of indices (row, column) for the top 10% largest elements.
"""
# Flatten the matrix
flat_matrix = matrix.flatten()
# Calculate the threshold for the top 10%
num_elements = len(flat_matrix)
threshold_index = int(num_elements * 0.01*p) # Top 10%
threshold_index = max(1, threshold_index) # Ensure at least one element is considered
threshold_value = np.partition(flat_matrix, -threshold_index)[-threshold_index]
# Create a mask for elements >= threshold
mask = matrix >= threshold_value
# Get the indices of elements that satisfy the mask
indices = np.argwhere(mask)
return list(map(tuple, indices))
def main():
imgpath = r"\\192.168.1.28\share\数据\已完成数据\展厅数据\v1.0\比对数据\整理\zhantingBase\6923555210479"
feats, imgnames = gen_features(imgpath)
n = len(feats)
matrix = 1 - cdist(feats, feats, 'cosine')
nmatrix = np.array([[matrix[i][j] for j in range(n) if i != j] for i in range(n)])
top_p_large_index = top_p_percent_indices(nmatrix, 1)
top_p_small_index = top_p_percent_indices(-1*nmatrix, 1)
simi_mean = np.mean(nmatrix, axis=1)
max_simi = np.max(nmatrix)
max_index = np.where(nmatrix==max_simi)
min_simi = np.min(nmatrix)
min_index = np.where(nmatrix==min_simi)
fig, ax = plt.subplots()
simils = [matrix[i][j] for j in range(n) for i in range(n) if j>i]
ax.hist(simils, bins=60, range=(-0.2, 1), edgecolor='black')
ax.set_xlim([-0.2, 1])
ax.set_title("Similarity")
print("done!")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,200 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 9 10:36:45 2024
分析图像对间的相似度
@author: ym
"""
import os
import cv2
import numpy as np
import torch
import sys
from scipy.spatial.distance import cdist
''' 加载 LC 定义的模型形式'''
from config import config as conf
from model import resnet18 as resnet18
from test_ori import inference_image
##============ load resnet mdoel
model = resnet18().to(conf.device)
# model = nn.DataParallel(model).to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
print('load model {} '.format(conf.testbackbone))
IMG_FORMAT = ['.bmp', '.jpg', '.JPG', '.jpeg', '.png']
# =============================================================================
# ''' 加载REID中定义的模型形式'''
# sys.path.append(r"D:\DetectTracking")
# from tracking.trackers.reid.reid_interface import ReIDInterface
# from tracking.trackers.reid.config import config as ReIDConfig
# ReIDEncoder = ReIDInterface(ReIDConfig)
#
# def inference_image_ReID(images):
# batch_patches = []
# patches = []
# for d, img1 in enumerate(images):
#
#
# img = img1[:, :, ::-1].copy() # the model expects RGB inputs
# patch = ReIDEncoder.transform(img)
#
# # patch = patch.to(device=self.device).half()
# if str(ReIDEncoder.device) != "cpu":
# patch = patch.to(device=ReIDEncoder.device).half()
# else:
# patch = patch.to(device=ReIDEncoder.device)
#
# patches.append(patch)
# if (d + 1) % ReIDEncoder.batch_size == 0:
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
# patches = []
#
# if len(patches):
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
#
# features = np.zeros((0, ReIDEncoder.embedding_size))
# for patches in batch_patches:
# pred = ReIDEncoder.model(patches)
# pred[torch.isinf(pred)] = 1.0
# feat = pred.cpu().data.numpy()
# features = np.vstack((features, feat))
#
# return features
# =============================================================================
def silimarity_compare():
imgpaths = r"D:\DetectTracking\contrast\images\2"
filepaths = []
for root, dirs, filenames in os.walk(imgpaths):
for filename in filenames:
file, ext = os.path.splitext(filename)
if ext not in IMG_FORMAT: continue
file_path = os.path.join(root, filename)
filepaths.append(file_path)
feature = inference_image(filepaths, conf.test_transform, model, conf.device)
feature /= np.linalg.norm(feature, axis=1)[:, None]
similar = 1 - np.maximum(0.0, cdist(feature, feature, metric='cosine'))
print("Done!")
def similarity_compare_sequence(root_dir):
'''
root_dir包含 "subimgs"字段的文件夹中图像为 subimg子图
功能:相邻帧子图间相似度比较
'''
all_files = []
extensions = ['.png', '.jpg']
for dirpath, dirnames, filenames in os.walk(root_dir):
filepaths = []
for filename in filenames:
if os.path.basename(dirpath).find('subimgs') < 0:
continue
file, ext = os.path.splitext(filename)
if ext in extensions:
imgpath = os.path.join(dirpath, filename)
filepaths.append(imgpath)
nf = len(filepaths)
if nf==0:
continue
fnma = os.path.basename(filepaths[0]).split('.')[0]
imga = cv2.imread(filepaths[0])
ha, wa = imga.shape[:2]
for i in range(1, nf):
fnmb = os.path.basename(filepaths[i]).split('.')[0]
imgb = cv2.imread(filepaths[i])
hb, wb = imgb.shape[:2]
feats = inference_image_ReID(((imga, imgb)))
similar = 1 - np.maximum(0.0, cdist(feats, feats, metric='cosine'))
h, w = max((ha, hb)), max((wa, wb))
img = np.zeros(((h, 2*w, 3)), np.uint8)
img[0:ha, 0:wa], img[0:hb, w:(w+wb)] = imga, imgb
linewidth = max(round(((h+2*w))/2 * 0.001), 2)
cv2.putText(img,
text=f'{similar[0,1]:.2f}', # Text string to be drawn
org=(max(w-20, 10), h-10), # Bottom-left corner of the text string
fontFace=0, # Font type
fontScale=linewidth/3, # Font scale factor
color=(0, 0, 255), # Text color
thickness=linewidth, # Thickness of the lines used to draw a text
lineType=cv2.LINE_AA, # Line type
)
spath = os.path.join(dirpath, 's'+fnma+'-vs-'+fnmb+'.png')
cv2.imwrite(spath, img)
fnma = os.path.basename(filepaths[i]).split('.')[0]
imga = imgb.copy()
ha, wa = imga.shape[:2]
return
def main():
root_dir = r"D:\contrast\dataset\result\20240723-112242_6923790709882"
try:
similarity_compare_sequence(root_dir)
except Exception as e:
print(f'Error: {e}')
if __name__ == '__main__':
# main()
silimarity_compare()

View File

@ -0,0 +1,160 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 20 11:17:29 2024
@author: ym
"""
import os
import cv2
import pickle
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
def save_imgpairs(barcode, imgpaths, matrix, savepath, thresh=(0.4, 0.6), ctype="intra"):
if ctype=="intra":
rows, cols = np.triu_indices(matrix.shape[0], k=1) # k=1 表示不包括对角线
mask = matrix[rows, cols] < thresh[1]
indices = list(zip(rows[mask], cols[mask]))
else:
rows, cols = np.where(matrix > thresh[0])
indices = list(zip(rows, cols))
if len(indices):
savepath = os.path.join(savepath, barcode)
if not os.path.exists(savepath):
os.makedirs (savepath)
for idx1, idx2 in indices:
if len(imgpaths) == 1:
img1 = cv2.imread(imgpaths[0][idx1])
img2 = cv2.imread(imgpaths[0][idx2])
elif len(imgpaths) == 2:
img1 = cv2.imread(imgpaths[0][idx1])
img2 = cv2.imread(imgpaths[1][idx2])
simi = matrix[idx1, idx2]
H1, W1 = img1.shape[:2]
H2, W2 = img2.shape[:2]
H, W = max((H1, H2)), max((W1, W2))
img = np.ones((H, 2*W, 3), dtype=np.uint8) *np.array([255, 128, 128])
img[0:H1, 0:W1, :] = img1
img[0:H2, (2*W-W2):, :] = img2
text = f"sim: {simi:.2f}"
org = (10, H-10)
cv2.putText(img, text, org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.75,
color=(0, 0, 255), thickness=2, lineType=cv2.LINE_AA)
imgpath = os.path.join(savepath, f"{simi:.2f}_{barcode}_{idx1}_{idx2}.png")
cv2.imwrite(imgpath, img)
def feat_analysis(featpath):
savepath = r"D:\exhibition\result\stdfeat"
InterThresh = (0.4, 0.6)
featDict, features= [], []
for filename in os.listdir(featpath):
file, ext = os.path.splitext(filename)
if ext != ".pickle": continue
filepath = os.path.join(featpath, filename)
with open(filepath, 'rb') as f:
bpDict = pickle.load(f)
feat = bpDict["feats_ft32"]
featDict.append(bpDict)
features.append(feat)
N = len(features)
simMatrix = []
intra_simi = np.empty(0)
low_simi_index = {}
for i, feats in enumerate(features):
matrix = 1 - cdist(feats, feats, 'cosine')
simMatrix.append(matrix)
'''提取相似矩阵上三角元素'''
rows, cols = np.triu_indices(matrix.shape[0], k=1) # k=1 表示不包括对角线
upper_tri= matrix[rows, cols]
intra_simi = np.concatenate((intra_simi, upper_tri))
'''保存相似度小于阈值的图像对'''
barcode = featDict[i]["barcode"]
imgpaths = featDict[i]["imgpaths"]
# save_imgpairs(barcode, [imgpaths], matrix, savepath, InterThresh, "intra")
print(f"{barcode} have done!")
Matrix = np.zeros((N, N))
inter_bcds = []
inter_simi = np.empty(0)
for i, feati in enumerate(features):
bcdi = featDict[i]["barcode"]
imgpathi = featDict[i]["imgpaths"]
for j, featj in enumerate(features):
bcdj = featDict[j]["barcode"]
imgpathj = featDict[j]["imgpaths"]
matrix = 1 - cdist(feati, featj, 'cosine')
inter_bcds.append((i, j, bcdi, bcdj))
Matrix[i, j] = np.mean(matrix)
if j>i:
bcd_ij = bcdi+'_'+bcdj
# save_imgpairs(bcd_ij, [imgpathi, imgpathj], matrix, savepath, InterThresh, "inter")
inter_simi = np.concatenate((inter_simi, matrix.ravel()))
print(f"{bcd_ij} have done!")
fig, axs = plt.subplots(2, 1)
axs[0].hist(intra_simi, bins=100, color='blue', edgecolor='black', alpha=0.7)
axs[0].set_xlim(0, 1)
axs[0].set_xlabel('Performance')
axs[0].set_title("inter similarity")
axs[1].hist(inter_simi, bins=100, color='green', edgecolor='black', alpha=0.7)
axs[1].set_xlim(0, 1)
axs[1].set_xlabel('Performance')
axs[1].set_title("inter similarity")
print("Done")
def main():
stdpath = r"D:\exhibition\dataset\feats"
feat_analysis(stdpath)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 26 08:53:58 2024
@author: ym
"""

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 13 16:49:05 2024
比较 stdBcdpath 和 filepath 中的 barcodes 列表,求出二者的并集和为包含在
stdBcdpath 中的 barcodes 清单
@author: ym
"""
import os
from openpyxl import load_workbook, Workbook
def read_xlsx():
stdBcdpath = r"\\192.168.1.28\share\已标注数据备份\对比数据\barcode\total_barcode_6588"
filepath = r"\\192.168.1.28\share\联华中环店\中环店商品信息.xlsx"
existingPath = r'\\192.168.1.28\share\联华中环店\中环店商品信息_已有商品.xlsx'
lackingPath = r'\\192.168.1.28\share\联华中环店\中环店商品信息_未包含商品.xlsx'
workbook = load_workbook(filename=filepath)
sheet = workbook['Sheet1']
barcodeCol = [sheet.cell(row=r, column=1).value for r in range(1, sheet.max_row+1)]
zhBarcodeList = [barcodeCol[i] for i in range(1, len(barcodeCol))]
stdBarcodeList = []
for filename in os.listdir(stdBcdpath):
filepath = os.path.join(stdBcdpath, filename)
if not os.path.isdir(filepath) or not filename.isdigit():
continue
stdBarcodeList.append(int(filename))
stdBarcodeSet = set(stdBarcodeList)
zhBarcodeSet = set(zhBarcodeList)
interBarcodes = list(zhBarcodeSet.intersection(stdBarcodeSet))
print(len(interBarcodes))
dest_wb1 = Workbook()
dest_sheet1 = dest_wb1.active
for row in sheet.iter_rows(min_row=1, max_col=sheet.max_column, values_only=True):
if str(row[0]).find("商品条码")>=0:
dest_sheet1.append(row)
if row[0] in interBarcodes:
dest_sheet1.append(row)
dest_wb1.save(filename=existingPath)
dest_wb1.close()
diffBarcodes = list(zhBarcodeSet.difference(stdBarcodeSet))
dest_wb2 = Workbook()
dest_sheet2 = dest_wb2.active
for row in sheet.iter_rows(min_row=1, max_col=sheet.max_column, values_only=True):
if str(row[0]).find("商品条码")>=0:
dest_sheet2.append(row)
if row[0] in diffBarcodes:
dest_sheet2.append(row)
dest_wb2.save(filename=lackingPath)
dest_wb2.close()
workbook.close()
if __name__ == '__main__':
# main()
read_xlsx()

View File

@ -0,0 +1,155 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 10 14:30:16 2024
@author: ym
"""
import os
import sys
import numpy as np
sys.path.append(r"D:\DetectTracking")
from tracking.utils.read_data import read_tracking_output, read_similar #, extract_data, read_deletedBarcode_file
IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png']
def creat_shopping_event(eventPath):
'''构造放入商品事件字典,这些事件需满足条件:
1) 前后摄至少有一条轨迹输出
2) 保存有帧图像,以便裁剪出 boxe 子图
'''
'''evtName 为一次购物事件'''
evtName = os.path.basename(eventPath)
evtList = evtName.split('_')
'''================ 0. 检查 evtName 及 eventPath 正确性和有效性 ================'''
if evtName.find('2024')<0 and len(evtList[0])!=15:
return
if not os.path.isdir(eventPath):
return
if len(evtList)==1 or (len(evtList)==2 and len(evtList[1])==0):
barcode = ''
else:
barcode = evtList[-1]
if len(evtList)==3 and evtList[-1]== evtList[-2]:
evtType = 'input'
else:
evtType = 'other'
'''================ 1. 构造事件描述字典,暂定 9 items ==============='''
event = {}
event['barcode'] = barcode
event['type'] = evtType
event['filepath'] = eventPath
event['back_imgpaths'] = []
event['front_imgpaths'] = []
event['back_boxes'] = np.empty((0, 9), dtype=np.float64)
event['front_boxes'] = np.empty((0, 9), dtype=np.float64)
event['back_feats'] = np.empty((0, 256), dtype=np.float64)
event['front_feats'] = np.empty((0, 256), dtype=np.float64)
event['feats_compose'] = np.empty((0, 256), dtype=np.float64)
event['one2one'] = None
event['one2n'] = None
event['feats_select'] = np.empty((0, 256), dtype=np.float64)
'''================= 2. 读取 data 文件 ============================='''
for dataname in os.listdir(eventPath):
# filename = '1_track.data'
datapath = os.path.join(eventPath, dataname)
if not os.path.isfile(datapath): continue
CamerType = dataname.split('_')[0]
''' 2.1 读取 0/1_track.data 中数据,暂不考虑'''
# if dataname.find("_track.data")>0:
# bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, tracking_feat_dict = extract_data(datapath)
''' 2.2 读取 0/1_tracking_output.data 中数据'''
if dataname.find("_tracking_output.data")>0:
tracking_output_boxes, tracking_output_feats = read_tracking_output(datapath)
if len(tracking_output_boxes) != len(tracking_output_feats): continue
if CamerType == '0':
event['back_boxes'] = tracking_output_boxes
event['back_feats'] = tracking_output_feats
elif CamerType == '1':
event['front_boxes'] = tracking_output_boxes
event['front_feats'] = tracking_output_feats
if dataname.find("process.data")==0:
simiDict = read_similar(datapath)
event['one2one'] = simiDict['one2one']
event['one2n'] = simiDict['one2n']
if len(event['back_boxes'])==0 or len(event['front_boxes'])==0:
return None
'''2.3 事件的特征表征方式: 特征选择、特征集成'''
bk_feats = event['back_feats']
ft_feats = event['front_feats']
'''2.3.1 特征集成'''
feats_compose = np.empty((0, 256), dtype=np.float64)
if len(ft_feats):
feats_compose = np.concatenate((feats_compose, ft_feats), axis=0)
if len(bk_feats):
feats_compose = np.concatenate((feats_compose, bk_feats), axis=0)
event['feats_compose'] = feats_compose
'''2.3.1 特征选择'''
if len(ft_feats):
event['feats_select'] = ft_feats
'''================ 3. 读取图像文件地址并按照帧ID排序 ============='''
frontImgs, frontFid = [], []
backImgs, backFid = [], []
for imgname in os.listdir(eventPath):
name, ext = os.path.splitext(imgname)
if ext not in IMG_FORMAT or name.find('frameId')<0: continue
CamerType = name.split('_')[0]
frameId = int(name.split('_')[3])
imgpath = os.path.join(eventPath, imgname)
if CamerType == '0':
backImgs.append(imgpath)
backFid.append(frameId)
if CamerType == '1':
frontImgs.append(imgpath)
frontFid.append(frameId)
frontIdx = np.argsort(np.array(frontFid))
backIdx = np.argsort(np.array(backFid))
'''3.1 生成依据帧 ID 排序的前后摄图像地址列表'''
frontImgs = [frontImgs[i] for i in frontIdx]
backImgs = [backImgs[i] for i in backIdx]
'''3.2 将前、后摄图像路径添加至事件字典'''
bfid = event['back_boxes'][:, 7].astype(np.int64)
ffid = event['front_boxes'][:, 7].astype(np.int64)
if len(bfid) and max(bfid) <= len(backImgs):
event['back_imgpaths'] = [backImgs[i-1] for i in bfid]
if len(ffid) and max(ffid) <= len(frontImgs):
event['front_imgpaths'] = [frontImgs[i-1] for i in ffid]
'''================ 4. 判断当前事件有效性,并添加至事件列表 =========='''
condt1 = len(event['back_imgpaths'])==0 or len(event['front_imgpaths'])==0
condt2 = len(event['front_feats'])==0 and len(event['back_feats'])==0
if condt1 or condt2:
print(f"Event: {evtName}, Error, condt1: {condt1}, condt2: {condt2}")
return None
return event

View File

@ -0,0 +1,533 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 26 17:35:05 2024
@author: ym
"""
import os
import cv2
import pickle
import numpy as np
from pathlib import Path
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.plotting import Annotator, colors
from tracking.utils.drawtracks import drawTrack
from tracking.utils.read_data import extract_data, read_tracking_output, read_similar
from tracking.utils.read_data import extract_data_realtime, read_tracking_output_realtime
IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png']
VID_FORMAT = ['.mp4', '.avi']
def save_data(event, resultPath=None):
'''事件轨迹子图保存'''
if resultPath is None:
resultPath = os.path.dirname(os.path.abspath(__file__))
subimgpath = os.path.join(resultPath, f"{event.evtname}", "subimg")
imgspath = os.path.join(resultPath, f"{event.evtname}", "imgs")
if not os.path.exists(subimgpath):
os.makedirs(subimgpath)
if not os.path.exists(imgspath):
os.makedirs(imgspath)
##(2) 保存轨迹中的子图
subimgpairs = event.save_event_subimg(subimgpath)
for subimgName, subimg in subimgpairs:
spath = os.path.join(subimgpath, subimgName)
cv2.imwrite(spath, subimg)
##(3) 保存序列图像
imgpairs = event.plot_save_image(imgspath)
for imgname, img in imgpairs:
spath = os.path.join(imgspath, imgname)
cv2.imwrite(spath, img)
##(4) 保存轨迹散点图
img_cat = event.draw_tracks()
trajpath = os.path.join(resultPath, "trajectory")
if not os.path.exists(trajpath):
os.makedirs(trajpath)
traj_imgpath = os.path.join(trajpath, event.evtname+".png")
cv2.imwrite(traj_imgpath, img_cat)
def array2list(bboxes):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
lboxes = []
if len(bboxes)==0:
return []
trackID = np.unique(bboxes[:, 4].astype(int))
track_ids = bboxes[:, 4].astype(int)
for t_id in trackID:
idx = np.where(track_ids == t_id)[0]
box = bboxes[idx, :]
lboxes.append(box)
return lboxes
class ShoppingEvent:
def __init__(self, eventpath, stype="data"):
'''stype: str, 'source', 'data', 'realtime', 共三种 '''
self.eventpath = eventpath
self.evtname = str(Path(eventpath).stem)
self.barcode = ''
self.evtType = ''
'''=========== path of image and video =========== '''
self.back_videopath = ''
self.front_videopath = ''
self.back_imgpaths = []
self.front_imgpaths = []
'''=========== process.data ==============================='''
self.one2one = None
self.one2n = None
self.one2SN = None
'''=========== 0/1_track.data ============================='''
self.back_yolobboxes = []
self.back_yolofeats = []
self.back_trackerboxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
self.back_trackerfeats = {}
self.back_trackingboxes = []
self.back_trackingfeats = []
self.front_yolobboxes = []
self.front_yolofeats = []
self.front_trackerboxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
self.front_trackerfeats = {}
self.front_trackingboxes = []
self.front_trackingfeats = []
'''=========== 0/1_tracking_output.data ==================='''
self.back_boxes = []
self.back_feats = []
self.front_boxes = []
self.front_feats = []
if stype=="data":
self.from_datafile(eventpath)
if stype=="realtime":
self.from_realtime_datafile(eventpath)
if stype=="source":
self.from_source_pkl(eventpath)
self.feats_select = np.empty((0, 256), dtype=np.float64)
self.feats_compose = np.empty((0, 256), dtype=np.float64)
self.select_feats()
self.compose_feats()
# if stype=="image":
# self.from_image(eventpath)
def kerndata(self, ShoppingDict, camtype="backCamera"):
'''
camtype: str, "backCamera" or "frontCamera"
'''
yoloboxes, resfeats = [], []
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackefeats = {}
trackingboxes, trackingfeats = [], []
frameDictList = ShoppingDict[camtype]["yoloResnetTracker"]
for frameDict in frameDictList:
yoloboxes.append(frameDict["bboxes"])
tboxes = frameDict["tboxes"]
trackefeats.update(frameDict["feats"])
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)), axis=0)
Residual = ShoppingDict[camtype]["tracking"].Residual
for track in Residual:
trackingboxes.append(track.boxes)
trackingfeats.append(track.features)
kdata = (yoloboxes, resfeats, trackerboxes, trackefeats, trackingboxes, trackingfeats)
tracking_out_boxes, tracking_out_feats = [], []
Confirmed = ShoppingDict[camtype]["tracking"].Confirmed
for track in Confirmed:
tracking_out_boxes.append(track.boxes)
tracking_out_feats.append(track.features)
outdata = (tracking_out_boxes, tracking_out_feats)
return kdata, outdata
def from_source_pkl(self, eventpath):
with open(eventpath, 'rb') as f:
ShoppingDict = pickle.load(f)
self.eventpath = ShoppingDict["eventPath"]
self.evtname = ShoppingDict["eventName"]
self.barcode = ShoppingDict["barcode"]
if len(ShoppingDict["one2n"]):
self.one2n = ShoppingDict["one2n"]
'''=========== path of image and video =========== '''
self.back_videopath = ShoppingDict["backCamera"]["videoPath"]
self.front_videopath = ShoppingDict["frontCamera"]["videoPath"]
self.back_imgpaths = ShoppingDict["backCamera"]["imagePaths"]
self.front_imgpaths = ShoppingDict["frontCamera"]["imagePaths"]
'''===========对应于 0/1_track.data ============================='''
backdata, back_outdata = self.kerndata(ShoppingDict, "backCamera")
frontdata, front_outdata = self.kerndata(ShoppingDict, "frontCamera")
self.back_yolobboxes = backdata[0]
self.back_yolofeats = backdata[1]
self.back_trackerboxes = backdata[2]
self.back_trackerfeats = [3]
self.back_trackingboxes = [4]
self.back_trackingfeats = [5]
self.front_yolobboxes = frontdata[0]
self.front_yolofeats = frontdata[1]
self.front_trackerboxes = frontdata[2]
self.front_trackerfeats = frontdata[3]
self.front_trackingboxes = frontdata[4]
self.front_trackingfeats = frontdata[5]
'''===========对应于 0/1_tracking_output.data ============================='''
self.back_boxes = back_outdata[0]
self.back_feats = back_outdata[1]
self.front_boxes = front_outdata[0]
self.front_feats = front_outdata[1]
def from_datafile(self, eventpath):
evtList = self.evtname.split('_')
if len(evtList)>=2 and len(evtList[-1])>=10 and evtList[-1].isdigit():
self.barcode = evtList[-1]
if len(evtList)==3 and evtList[-1]== evtList[-2]:
self.evtType = 'input'
else:
self.evtType = 'other'
'''================ path of image ============='''
frontImgs, frontFid = [], []
backImgs, backFid = [], []
for imgname in os.listdir(eventpath):
name, ext = os.path.splitext(imgname)
if ext not in IMG_FORMAT or name.find('frameId') < 0: continue
if len(name.split('_')) != 3 and not name.split('_')[3].isdigit(): continue
CamerType = name.split('_')[0]
frameId = int(name.split('_')[3])
imgpath = os.path.join(eventpath, imgname)
if CamerType == '0':
backImgs.append(imgpath)
backFid.append(frameId)
if CamerType == '1':
frontImgs.append(imgpath)
frontFid.append(frameId)
## 生成依据帧 ID 排序的前后摄图像地址列表
frontIdx = np.argsort(np.array(frontFid))
backIdx = np.argsort(np.array(backFid))
self.front_imgpaths = [frontImgs[i] for i in frontIdx]
self.back_imgpaths = [backImgs[i] for i in backIdx]
'''================ path of video ============='''
for vidname in os.listdir(eventpath):
name, ext = os.path.splitext(vidname)
if ext not in VID_FORMAT: continue
vidpath = os.path.join(eventpath, vidname)
CamerType = name.split('_')[0]
if CamerType == '0':
self.back_videopath = vidpath
if CamerType == '1':
self.front_videopath = vidpath
'''================ process.data ============='''
procpath = Path(eventpath).joinpath('process.data')
if procpath.is_file():
SimiDict = read_similar(procpath)
self.one2one = SimiDict['one2one']
self.one2n = SimiDict['one2n']
self.one2SN = SimiDict['one2SN']
'''=========== 0/1_track.data & 0/1_tracking_output.data ======='''
for dataname in os.listdir(eventpath):
datapath = os.path.join(eventpath, dataname)
if not os.path.isfile(datapath): continue
CamerType = dataname.split('_')[0]
'''========== 0/1_track.data =========='''
if dataname.find("_track.data")>0:
bboxes, ffeats, trackerboxes, trackerfeats, trackingboxes, trackingfeats = extract_data(datapath)
if CamerType == '0':
self.back_yolobboxes = bboxes
self.back_yolofeats = ffeats
self.back_trackerboxes = trackerboxes
self.back_trackerfeats = trackerfeats
self.back_trackingboxes = trackingboxes
self.back_trackingfeats = trackingfeats
if CamerType == '1':
self.front_yolobboxes = bboxes
self.front_yolofeats = ffeats
self.front_trackerboxes = trackerboxes
self.front_trackerfeats = trackerfeats
self.front_trackingboxes = trackingboxes
self.front_trackingfeats = trackingfeats
'''========== 0/1_tracking_output.data =========='''
if dataname.find("_tracking_output.data")>0:
tracking_output_boxes, tracking_output_feats = read_tracking_output(datapath)
if CamerType == '0':
self.back_boxes = tracking_output_boxes
self.back_feats = tracking_output_feats
elif CamerType == '1':
self.front_boxes = tracking_output_boxes
self.front_feats = tracking_output_feats
def from_realtime_datafile(self, eventpath):
# evtList = self.evtname.split('_')
# if len(evtList)>=2 and len(evtList[-1])>=10 and evtList[-1].isdigit():
# self.barcode = evtList[-1]
# if len(evtList)==3 and evtList[-1]== evtList[-2]:
# self.evtType = 'input'
# else:
# self.evtType = 'other'
'''================ path of video ============='''
for vidname in os.listdir(eventpath):
name, ext = os.path.splitext(vidname)
if ext not in VID_FORMAT: continue
vidpath = os.path.join(eventpath, vidname)
CamerType = name.split('_')[0]
if CamerType == '0':
self.back_videopath = vidpath
if CamerType == '1':
self.front_videopath = vidpath
'''================ process.data ============='''
procpath = Path(eventpath).joinpath('process.data')
if procpath.is_file():
SimiDict = read_similar(procpath)
self.one2one = SimiDict['one2one']
self.one2n = SimiDict['one2n']
self.one2SN = SimiDict['one2SN']
'''=========== 0/1_track.data & 0/1_tracking_output.data ======='''
for dataname in os.listdir(eventpath):
datapath = os.path.join(eventpath, dataname)
if not os.path.isfile(datapath): continue
CamerType = dataname.split('_')[0]
'''========== 0/1_track.data =========='''
if dataname.find("_track.data")>0:
trackerboxes, trackerfeats = extract_data_realtime(datapath)
if CamerType == '0':
self.back_trackerboxes = trackerboxes
self.back_trackerfeats = trackerfeats
if CamerType == '1':
self.front_trackerboxes = trackerboxes
self.front_trackerfeats = trackerfeats
'''========== 0/1_tracking_output.data =========='''
if dataname.find("_tracking_output.data")>0:
trackingboxes, trackingfeats, tracking_outboxes, tracking_outfeats = read_tracking_output_realtime(datapath)
if CamerType == '0':
self.back_trackingboxes = trackingboxes
self.back_trackingfeats = trackingfeats
self.back_boxes = tracking_outboxes
self.back_feats = tracking_outfeats
elif CamerType == '1':
self.front_trackingboxes = trackingboxes
self.front_trackingfeats = trackingfeats
self.front_boxes = tracking_outboxes
self.front_feats = tracking_outfeats
def compose_feats(self):
'''事件的特征集成'''
feats_compose = np.empty((0, 256), dtype=np.float64)
if len(self.front_feats):
for feat in self.front_feats:
feats_compose = np.concatenate((feats_compose, feat), axis=0)
if len(self.back_feats):
for feat in self.back_feats:
feats_compose = np.concatenate((feats_compose, feat), axis=0)
self.feats_compose = feats_compose
def select_feats(self):
'''事件的特征选择'''
if len(self.front_feats):
self.feats_select = self.front_feats[0]
elif len(self.back_feats):
self.feats_select = self.back_feats[0]
def plot_save_image(self, savepath):
def array2list(bboxes):
'''[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]'''
frame_ids = bboxes[:, 7].astype(int)
fID = np.unique(bboxes[:, 7].astype(int))
fboxes = []
for f_id in fID:
idx = np.where(frame_ids==f_id)[0]
box = bboxes[idx, :]
fboxes.append((f_id, box))
return fboxes
imgpairs = []
cameras = ('front', 'back')
for camera in cameras:
if camera == 'front':
boxes = self.front_trackerboxes
imgpaths = self.front_imgpaths
else:
boxes = self.back_trackerboxes
imgpaths = self.back_imgpaths
fboxes = array2list(boxes)
for fid, fbox in fboxes:
imgpath = imgpaths[int(fid-1)]
image = cv2.imread(imgpath)
annotator = Annotator(image.copy(), line_width=2)
for i, box in enumerate(fbox):
x1, y1, x2, y2, tid, score, cls, fid, bid = box
label = f'{int(tid), int(cls)}'
if tid >=0 and cls==0:
color = colors(int(cls), True)
elif tid >=0 and cls!=0:
color = colors(int(tid), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
xyxy = (x1/2, y1/2, x2/2, y2/2)
annotator.box_label(xyxy, label, color=color)
im0 = annotator.result()
imgpairs.append((Path(imgpath).name, im0))
# spath = os.path.join(savepath, Path(imgpath).name)
# cv2.imwrite(spath, im0)
return imgpairs
def save_event_subimg(self, savepath):
'''
功能: 保存一次购物事件的轨迹子图
9 items: barcode, type, filepath, back_imgpaths, front_imgpaths,
back_boxes, front_boxes, back_feats, front_feats,
feats_compose, feats_select
子图保存次序:先前摄、后后摄,以 k 为编号,和 "feats_compose" 中次序相同
'''
imgpairs = []
cameras = ('front', 'back')
for camera in cameras:
boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
if camera == 'front':
for b in self.front_boxes:
boxes = np.concatenate((boxes, b), axis=0)
imgpaths = self.front_imgpaths
else:
for b in self.back_boxes:
boxes = np.concatenate((boxes, b), axis=0)
imgpaths = self.back_imgpaths
for i, box in enumerate(boxes):
x1, y1, x2, y2, tid, score, cls, fid, bid = box
imgpath = imgpaths[int(fid-1)]
image = cv2.imread(imgpath)
subimg = image[int(y1/2):int(y2/2), int(x1/2):int(x2/2), :]
camerType, timeTamp, _, frameID = os.path.basename(imgpath).split('.')[0].split('_')
subimgName = f"cam{camerType}_{i}_tid{int(tid)}_fid({int(fid)}, {frameID}).png"
imgpairs.append((subimgName, subimg))
# spath = os.path.join(savepath, subimgName)
# cv2.imwrite(spath, subimg)
return imgpairs
# basename = os.path.basename(event['filepath'])
print(f"Image saved: {os.path.basename(self.eventpath)}")
def draw_tracks(self):
front_edge = cv2.imread(r"D:\DetectTracking\tracking\shopcart\cart_tempt\board_ftmp_line.png")
back_edge = cv2.imread(r"D:\DetectTracking\tracking\shopcart\cart_tempt\edgeline.png")
front_trackerboxes = array2list(self.front_trackerboxes)
back_trackerboxes = array2list(self.back_trackerboxes)
# img1, img2 = edgeline.copy(), edgeline.copy()
img1 = drawTrack(front_trackerboxes, front_edge.copy())
img2 = drawTrack(self.front_trackingboxes, front_edge.copy())
img3 = drawTrack(back_trackerboxes, back_edge.copy())
img4 = drawTrack(self.back_trackingboxes, back_edge.copy())
imgcat1 = np.concatenate((img1, img2), axis = 1)
H, W = imgcat1.shape[:2]
cv2.line(imgcat1, (int(W/2), 0), (int(W/2), H), (128, 255, 128), 2)
imgcat2 = np.concatenate((img3, img4), axis = 1)
H, W = imgcat2.shape[:2]
cv2.line(imgcat2, (int(W/2), 0), (int(W/2), H), (128, 255, 128), 2)
illus = [imgcat1, imgcat2]
if len(illus):
img_cat = np.concatenate(illus, axis = 1)
if len(illus)==2:
H, W = img_cat.shape[:2]
cv2.line(img_cat, (int(W/2), 0), (int(W/2), int(H)), (128, 128, 255), 3)
return img_cat
def main():
# pklpath = r"D:\DetectTracking\evtresult\images2\ShoppingDict.pkl"
# evt = ShoppingEvent(pklpath, stype='source')
evtpath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\images\20241209-160248-08edd5f6-1806-45ad-babf-7a4dd11cea60_6973226721445"
evt = ShoppingEvent(evtpath, stype='data')
img_cat = evt.draw_tracks()
cv2.imwrite("a.png", img_cat)
if __name__ == "__main__":
main()
# main1()

View File

@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 31 15:17:01 2024
@author: ym
"""
import numpy as np
import matplotlib.pyplot as plt
def showHist(err, correct):
err = np.array(err)
correct = np.array(correct)
fig, axs = plt.subplots(2, 1)
axs[0].hist(err, bins=50, edgecolor='black')
axs[0].set_xlim([0, 1])
axs[0].set_title('err')
axs[1].hist(correct, bins=50, edgecolor='black')
axs[1].set_xlim([0, 1])
axs[1].set_title('correct')
# plt.show()
return plt
def show_recall_prec(recall, prec, ths):
# x = np.linspace(start=-0, stop=1, num=11, endpoint=True).tolist()
fig = plt.figure(figsize=(10, 6))
plt.plot(ths, recall, color='red', label='recall')
plt.plot(ths, prec, color='blue', label='PrecisePos')
plt.legend()
plt.xlabel(f'threshold')
# plt.ylabel('Similarity')
plt.grid(True, linestyle='--', alpha=0.5)
# plt.savefig('accuracy_recall_grid.png')
# plt.show()
# plt.close()
return plt
def compute_recall_precision(err_similarity, correct_similarity):
ths = np.linspace(0, 1, 51)
recall, prec = [], []
for th in ths:
TP = len([num for num in correct_similarity if num >= th])
FP = len([num for num in err_similarity if num >= th])
if (TP+FP) == 0:
prec.append(1)
recall.append(0)
else:
prec.append(TP / (TP + FP))
recall.append(TP / (len(err_similarity) + len(correct_similarity)))
return recall, prec, ths

View File

@ -0,0 +1,182 @@
# -*- coding: utf-8 -*-
"""
@author: LiChen
"""
import json
import os
import pickle
import numpy as np
import sys
sys.path.append(r"D:\DetectTracking\contrast")
from config import config as conf
# from img_data import library_imgs, temp_imgs, main_library_imgs, main_imgs_2
# from test_logic import initModel,getFeatureList
from model import resnet18
import torch
from PIL import Image
device = conf.device
def initModel():
model = resnet18().to(device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
return model
from PIL import Image
def convert_rgba_to_rgb(image_path, output_path=None):
"""
将给定路径的4通道PNG图像转换为3通道并保存到指定输出路径。
:param image_path: 输入图像的路径
:param output_path: 转换后的图像保存路径
"""
# 打开图像
img = Image.open(image_path)
# 转换图像模式从RGBA到RGB
# .convert('RGB')会丢弃Alpha通道并转换为纯RGB图像
if img.mode == 'RGBA':
# 转换为RGB模式
img_rgb = img.convert('RGB')
# 保存转换后的图像
img_rgb.save(image_path)
print(f"Image converted from RGBA to RGB and saved to {image_path}")
# else:
# # 如果已经是RGB或其他模式直接保存
# img.save(image_path)
# print(f"Image already in {img.mode} mode, saved to {image_path}")
def test_preprocess(images: list, actionModel=False) -> torch.Tensor:
res = []
for img in images:
try:
print(img)
im = conf.test_transform(img) if actionModel else conf.test_transform(Image.open(img))
res.append(im)
except:
continue
data = torch.stack(res)
return data
def inference(images, model, actionModel=False):
data = test_preprocess(images, actionModel)
if torch.cuda.is_available():
data = data.to(conf.device)
features = model(data)
return features
def group_image(images, batch=64) -> list:
"""Group image paths by batch size"""
size = len(images)
res = []
for i in range(0, size, batch):
end = min(batch + i, size)
res.append(images[i:end])
return res
def getFeatureList(barList, imgList, model):
featList = [[] for i in range(len(barList))]
for index, feat in enumerate(imgList):
groups = group_image(feat)
for group in groups:
feat_tensor = inference(group, model)
for fe in feat_tensor:
if fe.device == 'cpu':
fe_np = fe.squeeze().detach().numpy()
else:
fe_np = fe.squeeze().detach().cpu().numpy()
featList[index].append(fe_np)
return featList
def get_files(folder):
file_dict = {}
cnt = 0
# barcode_list = ['6944649700065', '6924743915848', '6920459905012', '6901285991219', '6924882406269']
for root, dirs, files in os.walk(folder):
folder_name = os.path.basename(root) # 获取当前文件夹名称
print(folder_name)
# with open('main_barcode.txt','a') as f:
# f.write(folder_name + '\n')
# if len(dirs) == 0 and len(files) > 0 and folder_name in barcode_list: # 如果该文件夹没有子文件夹且有文件
if len(dirs) == 0 and len(files) > 0: # 如果该文件夹没有子文件夹且有文件
file_names = [os.path.join(root, file) for file in files] # 获取所有文件名
for file_name in file_names:
try:
convert_rgba_to_rgb(file_name)
except:
file_names.remove(file_name)
cnt += len(file_names)
file_dict[folder_name] = file_names
print(cnt)
return file_dict
def normalize(queFeatList):
for num1 in range(len(queFeatList)):
for num2 in range(len(queFeatList[num1])):
queFeatList[num1][num2] = queFeatList[num1][num2] / np.linalg.norm(queFeatList[num1][num2])
return queFeatList
def img2feature(imgs_dict, model, barcode_flag):
if not len(imgs_dict) > 0:
raise ValueError("No imgs files provided")
queBarIdList = list(imgs_dict.keys())
queImgsList = list(imgs_dict.values())
queFeatList = getFeatureList(queBarIdList, queImgsList, model)
queFeatList = normalize(queFeatList)
return queBarIdList, queFeatList
def createFeatureDict(imgs_dict, model,
barcode_flag=False): ##imgs->{barcode1:[img1_1...img1_n], barcode2:[img2_1...img2_n]}
dicts_all = {}
value_list = []
barcode_list, imgs_list = img2feature(imgs_dict, model, barcode_flag=False)
for i in range(len(barcode_list)):
dicts = {}
imgs_list_ = []
for j in range(len(imgs_list[i])):
imgs_list_.append(imgs_list[i][j].tolist())
# with open('feature.txt','a') as f:
# f.write(str(imgs_list[i][j].tolist())+'\n')
dicts['key'] = barcode_list[i]
dicts['value'] = imgs_list_
value_list.append(dicts)
dicts_all['total'] = value_list
print('dicts_all', dicts_all)
with open('data_0909.json', 'a') as json_file:
json.dump(dicts_all, json_file)
def read_pkl_file(file_path):
with open(file_path, 'rb') as file:
data = pickle.load(file)
return data
if __name__ == "__main__":
###将图片名称和模型推理特征向量字典存为json文件
img_path = 'data/2000_train/base'
imgs_dict = get_files(img_path)
# print('imgs_dict', imgs_dict)
model = initModel()
createFeatureDict(imgs_dict, model, barcode_flag=False)
###=======================================================
# ## =========pkl转json================
# contents = read_pkl_file('dicts_list_1887.pkl')
# print(contents)
# with open('data_1887.json', 'w') as json_file:
# json.dump(contents, json_file)

140
detecttracking/export.py Normal file
View File

@ -0,0 +1,140 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit
Format | `export.py --include` | Model
--- | --- | ---
PyTorch | - | yolov5s.pt
TorchScript | `torchscript` | yolov5s.torchscript
ONNX | `onnx` | yolov5s.onnx
OpenVINO | `openvino` | yolov5s_openvino_model/
TensorRT | `engine` | yolov5s.engine
CoreML | `coreml` | yolov5s.mlmodel
TensorFlow SavedModel | `saved_model` | yolov5s_saved_model/
TensorFlow GraphDef | `pb` | yolov5s.pb
TensorFlow Lite | `tflite` | yolov5s.tflite
TensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tflite
TensorFlow.js | `tfjs` | yolov5s_web_model/
PaddlePaddle | `paddle` | yolov5s_paddle_model/
Requirements:
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU
$ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU
Usage:
$ python export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...
Inference:
$ python detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s_openvino_model # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
yolov5s_paddle_model # PaddlePaddle
TensorFlow.js:
$ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
$ npm install
$ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model
$ npm start
"""
import json
import os
import platform
import sys
from pathlib import Path
import pandas as pd
import torch
from torch.utils.mobile_optimizer import optimize_for_mobile
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != 'Windows':
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from detecttracking.utils.general import (LOGGER, Profile, colorstr, file_size, get_default_args)
MACOS = platform.system() == 'Darwin' # macOS environment
class iOSModel(torch.nn.Module):
def __init__(self, model, im):
super().__init__()
b, c, h, w = im.shape # batch, channel, height, width
self.model = model
self.nc = model.nc # number of classes
if w == h:
self.normalize = 1. / w
else:
self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]) # broadcast (slower, smaller)
# np = model(im)[0].shape[1] # number of points
# self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]).expand(np, 4) # explicit (faster, larger)
def forward(self, x):
xywh, conf, cls = self.model(x)[0].squeeze().split((4, 1, self.nc), 1)
return cls * conf, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
def export_formats():
# YOLOv5 export formats
x = [
['PyTorch', '-', '.pt', True, True],
['TorchScript', 'torchscript', '.torchscript', True, True],
['ONNX', 'onnx', '.onnx', True, True],
['OpenVINO', 'openvino', '_openvino_model', True, False],
['TensorRT', 'engine', '.engine', False, True],
['CoreML', 'coreml', '.mlmodel', True, False],
['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
['TensorFlow GraphDef', 'pb', '.pb', True, True],
['TensorFlow Lite', 'tflite', '.tflite', True, False],
['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
['TensorFlow.js', 'tfjs', '_web_model', False, False],
['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
def try_export(inner_func):
# YOLOv5 export decorator, i..e @try_export
inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs):
prefix = inner_args['prefix']
try:
with Profile() as dt:
f, model = inner_func(*args, **kwargs)
LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')
return f, model
except Exception as e:
LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')
return None, None
return outer_func
@try_export
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
# YOLOv5 TorchScript model export
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
f = file.with_suffix('.torchscript')
ts = torch.jit.trace(model, im, strict=False)
d = {'shape': im.shape, 'stride': int(max(model.stride)), 'names': model.names}
extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
else:
ts.save(str(f), _extra_files=extra_files)
return f, None

View File

@ -0,0 +1,285 @@
# -*- coding: utf-8 -*-
"""
Created on Fri May 31 14:50:21 2024
@author: ym
"""
import cv2
import numpy as np
import torch
from scipy.spatial.distance import cdist
from tracking.trackers.reid.config import config as ReIDConfig
from tracking.trackers.reid.reid_interface import ReIDInterface
ReIDEncoder = ReIDInterface(ReIDConfig)
def read_data_file(datapath):
with open(datapath, 'r') as file:
lines = file.readlines()
Videos = []
FrameBoxes, FrameFeats = [], []
boxes, feats = [], []
bboxes, ffeats = [], []
timestamp = []
t1 = None
for line in lines:
if line.find('CameraId') >= 0:
t = int(line.split(',')[1].split(':')[1])
timestamp.append(t)
if len(boxes) and len(feats):
FrameBoxes.append(np.array(boxes, dtype = np.float32))
FrameFeats.append(np.array(feats, dtype = np.float32))
boxes, feats = [], []
if t1 and t - t1 > 1e4:
Videos.append((FrameBoxes, FrameFeats))
FrameBoxes, FrameFeats = [], []
t1 = int(line.split(',')[1].split(':')[1])
if line.find('box') >= 0:
box = line.split(':', )[1].split(',')[:-1]
boxes.append(box)
bboxes.append(boxes)
if line.find('feat') >= 0:
feat = line.split(':', )[1].split(',')[:-1]
feats.append(feat)
ffeats.append(feat)
FrameBoxes.append(np.array(boxes, dtype = np.float32))
FrameFeats.append(np.array(feats, dtype = np.float32))
Videos.append((FrameBoxes, FrameFeats))
TimeStamp = np.array(timestamp, dtype = np.float32)
DimesDiff = np.diff((timestamp))
return Videos
def inference_image(image, detections):
H, W, _ = np.shape(image)
imgs = []
batch_patches = []
patches = []
for d in range(np.size(detections, 0)):
tlbr = detections[d, :4].astype(np.int_)
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
img1 = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
img = img1[:, :, ::-1].copy() # the model expects RGB inputs
patch = ReIDEncoder.transform(img)
imgs.append(img1)
# patch = patch.to(device=self.device).half()
if str(ReIDEncoder.device) != "cpu":
patch = patch.to(device=ReIDEncoder.device).half()
else:
patch = patch.to(device=ReIDEncoder.device)
patches.append(patch)
if (d + 1) % ReIDEncoder.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, ReIDEncoder.embedding_size))
for patches in batch_patches:
pred = ReIDEncoder.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return imgs, features
def readimg():
imgpath = r"D:\datasets\ym\Img_ResnetData\result\0.png"
image = cv2.imread(imgpath)
img = cv2.resize(image, (224, 224))
cv2.imwrite('0_224x224.jpg', img)
def readdata(datapath):
with open(datapath, 'r') as file:
lines = file.readlines()
dlist = lines[0].split(',')
dfloat = [float(d) for d in dlist]
afeat = np.array(dfloat).reshape(1, -1)
return afeat
def readrawimg(datapath):
with open(datapath, 'r') as file:
llines = file.readlines()
imgs = []
row = 224
for i in range(8):
lines = llines[i*224 : (i+1)*224]
img = np.empty((224, 224, 0), dtype=np.float32)
imgr = np.empty((0, 224), dtype=np.float32)
imgg = np.empty((0, 224), dtype=np.float32)
imgb = np.empty((0, 224), dtype=np.float32)
for line in lines:
dlist = line.split(' ')[0:224]
img_r = np.array([float(s.split(',')[0]) for s in dlist], dtype=np.float32).reshape(1, -1)
img_g = np.array([float(s.split(',')[1]) for s in dlist], dtype=np.float32).reshape(1, -1)
img_b = np.array([float(s.split(',')[2]) for s in dlist], dtype=np.float32).reshape(1, -1)
# img_r = [float(s.split(',')[0]) for s in dlist if len(s.split(',')[0].encode('utf-8')) == 4]
# img_g = [float(s.split(',')[1]) for s in dlist if len(s.split(',')[1].encode('utf-8')) == 4]
# img_b = [float(s.split(',')[2]) for s in dlist if len(s.split(',')[2].encode('utf-8')) == 4]
imgr = np.concatenate((imgr, img_r), axis=0)
imgg = np.concatenate((imgg, img_g), axis=0)
imgb = np.concatenate((imgb, img_b), axis=0)
imgr = imgr[:, :, None]
imgg = imgg[:, :, None]
imgb = imgb[:, :, None]
img = np.concatenate((imgb, imgg, imgr), axis=2).astype(np.uint8)
imgs.append(img)
return imgs
def inference(image):
patches = []
image = image[:, :, ::-1].copy() # the model expects RGB inputs
patch = ReIDEncoder.transform(image)
patch = patch.to(device=ReIDEncoder.device)
patches.append(patch)
patches = torch.stack(patches, dim=0)
pred = ReIDEncoder.model(patches)
pred[torch.isinf(pred)] = 1.0
bfeat = pred.cpu().data.numpy()
return bfeat
def test_img_feat():
# datapath = r"D:\datasets\ym\Img_ResnetData\aa\aa.txt"
# afeat = readdata(datapath)
imgpath = r"D:\datasets\ym\Img_ResnetData\aa\aa.jpg"
img = cv2.imread(imgpath)
bfeat = inference(img)
datapath = r"D:\datasets\ym\Img_ResnetData\rawimg\7.txt"
afeat = readdata(datapath)
rawpath = r"D:\datasets\ym\Img_ResnetData\rawimg\28950640607_mat_rgb"
imgx = readrawimg(rawpath)
cv2.imwrite("rawimg.png", imgx[7])
bfeatx = inference(imgx[7])
cost_matrix = 1 - np.maximum(0.0, cdist(afeat, bfeatx, 'cosine'))
imgpath1 = r"D:\datasets\ym\Img_ResnetData\result\0_224x224.png"
img1 = cv2.imread(imgpath1)
bfeat1 = inference(img1)
aafeat = afeat / np.linalg.norm(afeat, ord=2, axis=1, keepdims=True)
bbfeat = bfeat / np.linalg.norm(bfeat, ord=2, axis=1, keepdims=True)
cost_matrix = 1 - np.maximum(0.0, cdist(aafeat, bbfeat, 'cosine'))
print("Done!!!")
def main():
imgpath = r"D:\datasets\ym\Img_ResnetData\20240531-103547_0354b1cb-53fa-48de-86cd-ac3c5b127ada_6921168593576\3568800050000_0.jpeg"
datapath = r"D:\datasets\ym\Img_ResnetData\20240531-103547_0354b1cb-53fa-48de-86cd-ac3c5b127ada_6921168593576\0_tracker_inout.data"
savepath = r"D:\datasets\ym\Img_ResnetData\result"
image = cv2.imread(imgpath)
Videos = read_data_file(datapath)
bboxes, afeats = Videos[0][0][0], Videos[0][1][0]
imgs, bfeats = inference_image(image, bboxes)
aafeats = afeats / np.linalg.norm(afeats, ord=2, axis=1, keepdims=True)
bbfeats = bfeats / np.linalg.norm(bfeats, ord=2, axis=1, keepdims=True)
cost_matrix = 1 - np.maximum(0.0, cdist(aafeats, bbfeats, 'cosine'))
for i, img in enumerate(imgs):
cv2.imwrite(savepath + f"\{i}.png", img)
print("Done!!!!")
if __name__ == '__main__':
# main()
# readimg()
test_img_feat()

View File

@ -0,0 +1,107 @@
import cv2
import os
import numpy as np
import mediapipe as mp
class hand_pose:
def __init__(self, min_dc=0.45, min_tc=0.45, max_nh=1):
self.mp_drawing = mp.solutions.drawing_utils
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=max_nh,
min_detection_confidence=min_dc,
min_tracking_confidence=min_tc
)
@staticmethod
def img_show(img):
cv2.imshow('MediaPipe Hands', img)
cv2.waitKey(1)
def draw(self, img):
hand_locals = []
h_re = self.hands.process(img)
if h_re.multi_hand_landmarks:
for hand in h_re.multi_handedness:
hand_position = 'Left' if hand.classification[0].label == "Left" else 'Right'
hand_local = []
for hand_landmarks in h_re.multi_hand_landmarks:
self.mp_drawing.draw_landmarks(img,
hand_landmarks,
self.mp_hands.HAND_CONNECTIONS)
imgshow = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# self.img_show(imgshow)
cv2.imwrite(f"./images/{k}.png", imgshow)
'''获取手部关键点坐标'''
for i in range(21):
x = hand_landmarks.landmark[i].x * img.shape[1]
y = hand_landmarks.landmark[i].y * img.shape[0]
hand_local.append((x, y))
hand_locals.append(hand_local)
return hand_locals
def get_hand_local(self, track, image):
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]'''
H, W, _ = np.shape(image)
tlbr = track[:4].astype(np.int_)
x1 = max(0, tlbr[0]-50)
y1 = max(0, tlbr[1]-50)
x2 = min(W - 1, tlbr[2]+50)
y2 = min(H - 1, tlbr[3]+50)
img = image[y1:y2, x1:x2, :]
imgshow = img.copy()
# hand_local = np.empty((0,2), dtype = np.int_)
min_x, max_x = W-1, 0
min_y, max_y = H-1, 0
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
h_re = self.hands.process(img)
if h_re.multi_hand_landmarks:
for hand_landmarks in h_re.multi_hand_landmarks:
for i in range(21):
x = hand_landmarks.landmark[i].x * img.shape[1]
y = hand_landmarks.landmark[i].y * img.shape[0]
# hand_local = np.concatenate([hand_local, np.array([[x1+x, y1+y]]).astype(np.int_)], axis=0)
if min_x>x1+x: min_x=x1+x
if max_x<x1+x: max_x=x1+x
if min_y>y1+y: min_y=y1+y
if max_y<y1+y: max_y=y1+y
self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
imgshow = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if min_x < max_x and min_y < max_y:
hand_local = (min_x, min_y, max_x, max_y)
else:
hand_local = None
return hand_local, imgshow
if __name__ == "__main__":
handpose = hand_pose()
file = r"D:\datasets\ym\videos\标记视频\加购_双手放入_1.mp4"
cap = cv2.VideoCapture(file)
k = 0
while cap.isOpened():
ret, frame = cap.read()
if ret is False:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
hand_local = handpose.draw(frame)
k += 1
cap.release()
# cv2.destroyAllWindows()

View File

@ -0,0 +1,44 @@
import cv2
import os
import mediapipe as mp
class hand_pose:
def __init__(self, min_dc=0.45, min_tc=0.45, max_nh=1):
self.mp_drawing = mp.solutions.drawing_utils
self.mp_hands = mp.solutions.hands
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=max_nh,
min_detection_confidence=min_dc,
min_tracking_confidence=min_tc
)
@staticmethod
def img_show(img):
cv2.imshow('MediaPipe Hands', img)
cv2.waitKey(1)
def draw(self, img):
hand_local = []
h_re = self.hands.process(img)
if h_re.multi_hand_landmarks:
for hand_landmarks in h_re.multi_hand_landmarks:
self.mp_drawing.draw_landmarks(img,
hand_landmarks,
self.mp_hands.HAND_CONNECTIONS)
self.img_show(img)
'''获取手部关键点坐标'''
for i in range(21):
x = hand_landmarks.landmark[i].x * img.shape[1]
y = hand_landmarks.landmark[i].y * img.shape[0]
hand_local.append((x, y))
return hand_local
if __name__ == "__main__":
handpose = hand_pose()
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
handpose.draw(frame)

File diff suppressed because it is too large Load Diff

View File

View File

@ -0,0 +1,883 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Common modules
"""
import ast
import contextlib
import json
import math
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
from urllib.parse import urlparse
import cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp
# Import 'ultralytics' package or install if if missing
try:
import ultralytics
assert hasattr(ultralytics, '__version__') # verify package is not directory
except (ImportError, AssertionError):
import os
os.system('pip install -U ultralytics')
import ultralytics
from ultralytics.utils.plotting import Annotator, colors, save_one_box
from detecttracking.utils import TryExcept
from detecttracking.utils.dataloaders import exif_transpose, letterbox
from detecttracking.utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
xyxy2xywh, yaml_load)
from detecttracking.utils.torch_utils import copy_attr, smart_inference_mode
def autopad(k, p=None, d=1): # kernel, padding, dilation
# Pad to 'same' shape outputs
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
return self.act(self.conv(x))
class DWConv(Conv):
# Depth-wise convolution
def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
def __init__(self, c, num_heads):
super().__init__()
self.q = nn.Linear(c, c, bias=False)
self.k = nn.Linear(c, c, bias=False)
self.v = nn.Linear(c, c, bias=False)
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
self.fc1 = nn.Linear(c, c, bias=False)
self.fc2 = nn.Linear(c, c, bias=False)
def forward(self, x):
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
x = self.fc2(self.fc1(x)) + x
return x
class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
def __init__(self, c1, c2, num_heads, num_layers):
super().__init__()
self.conv = None
if c1 != c2:
self.conv = Conv(c1, c2)
self.linear = nn.Linear(c2, c2) # learnable position embedding
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
self.c2 = c2
def forward(self, x):
if self.conv is not None:
x = self.conv(x)
b, _, w, h = x.shape
p = x.flatten(2).permute(2, 0, 1)
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
self.cv4 = Conv(2 * c_, c2, 1, 1)
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
self.act = nn.SiLU()
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
y1 = self.cv3(self.m(self.cv1(x)))
y2 = self.cv2(x)
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
class CrossConv(nn.Module):
# Cross Convolution Downsample
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, (1, k), (1, s))
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
class C3x(C3):
# C3 module with cross-convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
class C3TR(C3):
# C3 module with TransformerBlock()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = TransformerBlock(c_, c_, 4, n)
class C3SPP(C3):
# C3 module with SPP()
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e)
self.m = SPP(c_, c_, k)
class C3Ghost(C3):
# C3 module with GhostBottleneck()
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__(c1, c2, n, shortcut, g, e)
c_ = int(c2 * e) # hidden channels
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
def __init__(self, c1, c2, k=(5, 9, 13)):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
with warnings.catch_warnings():
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
# return self.conv(self.contract(x))
class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
super().__init__()
c_ = c2 // 2 # hidden channels
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
def forward(self, x):
y = self.cv1(x)
return torch.cat((y, self.cv2(y)), 1)
class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super().__init__()
c_ = c2 // 2
self.conv = nn.Sequential(
GhostConv(c1, c_, 1, 1), # pw
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
act=False)) if s == 2 else nn.Identity()
def forward(self, x):
return self.conv(x) + self.shortcut(x)
class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain
def forward(self, x):
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
class Concat(nn.Module):
# Concatenate a list of tensors along dimension
def __init__(self, dimension=1):
super().__init__()
self.d = dimension
def forward(self, x):
return torch.cat(x, self.d)
class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx --dnn
# OpenVINO: *_openvino_model
# CoreML: *.mlmodel
# TensorRT: *.engine
# TensorFlow SavedModel: *_saved_model
# TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
# PaddlePaddle: *_paddle_model
from detecttracking.models.experimental import attempt_download, attempt_load # scoped to avoid circular import
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
fp16 &= pt or jit or onnx or engine or triton # FP16
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
stride = 32 # default stride
cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
if not (pt or triton):
w = attempt_download(w) # download if not local
if pt: # PyTorch
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
model.half() if fp16 else model.float()
if extra_files['config.txt']: # load metadata dict
d = json.loads(extra_files['config.txt'],
object_hook=lambda d: {
int(k) if k.isdigit() else k: v
for k, v in d.items()})
stride, names = int(d['stride']), d['names']
elif dnn: # ONNX OpenCV DNN
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
check_requirements('opencv-python>=4.5.4')
net = cv2.dnn.readNetFromONNX(w)
elif onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
import onnxruntime
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
session = onnxruntime.InferenceSession(w, providers=providers)
output_names = [x.name for x in session.get_outputs()]
meta = session.get_modelmeta().custom_metadata_map # metadata
if 'stride' in meta:
stride, names = int(meta['stride']), eval(meta['names'])
elif xml: # OpenVINO
LOGGER.info(f'Loading {w} for OpenVINO inference...')
check_requirements('openvino>=2023.0') # requires openvino-dev: https://pypi.org/project/openvino-dev/
from openvino.runtime import Core, Layout, get_batch
core = Core()
if not Path(w).is_file(): # if not *.xml
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
ov_model = core.read_model(model=w, weights=Path(w).with_suffix('.bin'))
if ov_model.get_parameters()[0].get_layout().empty:
ov_model.get_parameters()[0].set_layout(Layout('NCHW'))
batch_dim = get_batch(ov_model)
if batch_dim.is_static:
batch_size = batch_dim.get_length()
ov_compiled_model = core.compile_model(ov_model, device_name='AUTO') # AUTO selects best available device
stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
elif engine: # TensorRT
LOGGER.info(f'Loading {w} for TensorRT inference...')
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
if device.type == 'cpu':
device = torch.device('cuda:0')
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
logger = trt.Logger(trt.Logger.INFO)
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
model = runtime.deserialize_cuda_engine(f.read())
context = model.create_execution_context()
bindings = OrderedDict()
output_names = []
fp16 = False # default updated below
dynamic = False
for i in range(model.num_bindings):
name = model.get_binding_name(i)
dtype = trt.nptype(model.get_binding_dtype(i))
if model.binding_is_input(i):
if -1 in tuple(model.get_binding_shape(i)): # dynamic
dynamic = True
context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
if dtype == np.float16:
fp16 = True
else: # output
output_names.append(name)
shape = tuple(context.get_binding_shape(i))
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
elif coreml: # CoreML
LOGGER.info(f'Loading {w} for CoreML inference...')
import coremltools as ct
model = ct.models.MLModel(w)
elif saved_model: # TF SavedModel
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
import tensorflow as tf
keras = False # assume TF1 saved_model
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
import tensorflow as tf
def wrap_frozen_graph(gd, inputs, outputs):
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
ge = x.graph.as_graph_element
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
def gd_outputs(gd):
name_list, input_list = [], []
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
name_list.append(node.name)
input_list.extend(node.input)
return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
gd = tf.Graph().as_graph_def() # TF GraphDef
with open(w, 'rb') as f:
gd.ParseFromString(f.read())
frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
from tflite_runtime.interpreter import Interpreter, load_delegate
except ImportError:
import tensorflow as tf
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
delegate = {
'Linux': 'libedgetpu.so.1',
'Darwin': 'libedgetpu.1.dylib',
'Windows': 'edgetpu.dll'}[platform.system()]
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
else: # TFLite
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
interpreter = Interpreter(model_path=w) # load TFLite model
interpreter.allocate_tensors() # allocate
input_details = interpreter.get_input_details() # inputs
output_details = interpreter.get_output_details() # outputs
# load metadata
with contextlib.suppress(zipfile.BadZipFile):
with zipfile.ZipFile(w, 'r') as model:
meta_file = model.namelist()[0]
meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
stride, names = int(meta['stride']), meta['names']
elif tfjs: # TF.js
raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
elif paddle: # PaddlePaddle
LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
import paddle.inference as pdi
if not Path(w).is_file(): # if not *.pdmodel
w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
weights = Path(w).with_suffix('.pdiparams')
config = pdi.Config(str(w), str(weights))
if cuda:
config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
predictor = pdi.create_predictor(config)
input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
output_names = predictor.get_output_names()
elif triton: # NVIDIA Triton Inference Server
LOGGER.info(f'Using {w} as Triton Inference Server...')
check_requirements('tritonclient[all]')
from detecttracking.utils.triton import TritonRemoteModel
model = TritonRemoteModel(url=w)
nhwc = model.runtime.startswith('tensorflow')
else:
raise NotImplementedError(f'ERROR: {w} is not a supported format')
# class names
if 'names' not in locals():
names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False):
# YOLOv5 MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.fp16 and im.dtype != torch.float16:
im = im.half() # to FP16
if self.nhwc:
im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
if self.pt: # PyTorch
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
elif self.jit: # TorchScript
y = self.model(im)
elif self.dnn: # ONNX OpenCV DNN
im = im.cpu().numpy() # torch to numpy
self.net.setInput(im)
y = self.net.forward()
elif self.onnx: # ONNX Runtime
im = im.cpu().numpy() # torch to numpy
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
elif self.xml: # OpenVINO
im = im.cpu().numpy() # FP32
y = list(self.ov_compiled_model(im).values())
elif self.engine: # TensorRT
if self.dynamic and im.shape != self.bindings['images'].shape:
i = self.model.get_binding_index('images')
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
for name in self.output_names:
i = self.model.get_binding_index(name)
self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
s = self.bindings['images'].shape
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
self.binding_addrs['images'] = int(im.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
y = [self.bindings[x].data for x in sorted(self.output_names)]
elif self.coreml: # CoreML
im = im.cpu().numpy()
im = Image.fromarray((im[0] * 255).astype('uint8'))
# im = im.resize((192, 320), Image.BILINEAR)
y = self.model.predict({'image': im}) # coordinates are xywh normalized
if 'confidence' in y:
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
else:
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
elif self.paddle: # PaddlePaddle
im = im.cpu().numpy().astype(np.float32)
self.input_handle.copy_from_cpu(im)
self.predictor.run()
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
elif self.triton: # NVIDIA Triton Inference Server
y = self.model(im)
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
im = im.cpu().numpy()
if self.saved_model: # SavedModel
y = self.model(im, training=False) if self.keras else self.model(im)
elif self.pb: # GraphDef
y = self.frozen_func(x=self.tf.constant(im))
else: # Lite or Edge TPU
input = self.input_details[0]
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
if int8:
scale, zero_point = input['quantization']
im = (im / scale + zero_point).astype(np.uint8) # de-scale
self.interpreter.set_tensor(input['index'], im)
self.interpreter.invoke()
y = []
for output in self.output_details:
x = self.interpreter.get_tensor(output['index'])
if int8:
scale, zero_point = output['quantization']
x = (x.astype(np.float32) - zero_point) * scale # re-scale
y.append(x)
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
if isinstance(y, (list, tuple)):
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
else:
return self.from_numpy(y)
def from_numpy(self, x):
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
def warmup(self, imgsz=(1, 3, 640, 640)):
# Warmup model by running inference once
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
for _ in range(2 if self.jit else 1): #
self.forward(im) # warmup
@staticmethod
def _model_type(p='path/to/model.pt'):
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
from detecttracking.export import export_formats
from detecttracking.utils.downloads import is_url
sf = list(export_formats().Suffix) # export suffixes
if not is_url(p, check=False):
check_suffix(p, sf) # checks
url = urlparse(p) # if url may be Triton inference server
types = [s in Path(p).name for s in sf]
types[8] &= not types[9] # tflite &= not edgetpu
triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
return types + [triton]
@staticmethod
def _load_metadata(f=Path('path/to/meta.yaml')):
# Load metadata from meta.yaml if it exists
if f.exists():
d = yaml_load(f)
return d['stride'], d['names'] # assign stride, names
return None, None
class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
multi_label = False # NMS multiple labels per box
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
max_det = 1000 # maximum number of detections per image
amp = False # Automatic Mixed Precision (AMP) inference
def __init__(self, model, verbose=True):
super().__init__()
if verbose:
LOGGER.info('Adding AutoShape... ')
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
self.pt = not self.dmb or model.pt # PyTorch model
self.model = model.eval()
if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
m.inplace = False # Detect.inplace=False for safe multithread inference
m.export = True # do not output loss values
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
if self.pt:
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
@smart_inference_mode()
def forward(self, ims, size=640, augment=False, profile=False):
# Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
# file: ims = 'data/images/zidane.jpg' # str or PosixPath
# URI: = 'https://ultralytics.com/images/zidane.jpg'
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
# numpy: = np.zeros((640,1280,3)) # HWC
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
dt = (Profile(), Profile(), Profile())
with dt[0]:
if isinstance(size, int): # expand
size = (size, size)
p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
if isinstance(ims, torch.Tensor): # torch
with amp.autocast(autocast):
return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
# Pre-process
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(ims):
f = f'image{i}' # filename
if isinstance(im, (str, Path)): # filename or uri
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
im = np.asarray(exif_transpose(im))
elif isinstance(im, Image.Image): # PIL Image
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
files.append(Path(f).with_suffix('.jpg').name)
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
s = im.shape[:2] # HWC
shape0.append(s) # image shape
g = max(size) / max(s) # gain
shape1.append([int(y * g) for y in s])
ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
with amp.autocast(autocast):
# Inference
with dt[1]:
y = self.model(x, augment=augment) # forward
# Post-process
with dt[2]:
y = non_max_suppression(y if self.dmb else y[0],
self.conf,
self.iou,
self.classes,
self.agnostic,
self.multi_label,
max_det=self.max_det) # NMS
for i in range(n):
scale_boxes(shape1, y[i][:, :4], shape0[i])
return Detections(ims, y, files, dt, self.names, x.shape)
class Detections:
# YOLOv5 detections class for inference results
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
super().__init__()
d = pred[0].device # device
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
self.ims = ims # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
self.files = files # image filenames
self.times = times # profiling times
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred) # number of images (batch size)
self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
self.s = tuple(shape) # inference BCHW shape
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
s, crops = '', []
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
if pred.shape[0]:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
s = s.rstrip(', ')
if show or save or render or crop:
annotator = Annotator(im, example=str(self.names))
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
label = f'{self.names[int(cls)]} {conf:.2f}'
if crop:
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
crops.append({
'box': box,
'conf': conf,
'cls': cls,
'label': label,
'im': save_one_box(box, im, file=file, save=save)})
else: # all others
annotator.box_label(box, label if labels else '', color=colors(cls))
im = annotator.im
else:
s += '(no detections)'
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
if show:
if is_jupyter():
from IPython.display import display
display(im)
else:
im.show(self.files[i])
if save:
f = self.files[i]
im.save(save_dir / f) # save
if i == self.n - 1:
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
if render:
self.ims[i] = np.asarray(im)
if pprint:
s = s.lstrip('\n')
return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
if crop:
if save:
LOGGER.info(f'Saved results to {save_dir}\n')
return crops
@TryExcept('Showing images is not supported in this environment')
def show(self, labels=True):
self._run(show=True, labels=labels) # show results
def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
self._run(save=True, labels=labels, save_dir=save_dir) # save results
def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
return self._run(crop=True, save=save, save_dir=save_dir) # crop results
def render(self, labels=True):
self._run(render=True, labels=labels) # render results
return self.ims
def pandas(self):
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
new = copy(self) # return copy
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
return new
def tolist(self):
# return a list of Detections objects, i.e. 'for result in results.tolist():'
r = range(self.n) # iterable
x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
# for d in x:
# for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
# setattr(d, k, getattr(d, k)[0]) # pop out of list
return x
def print(self):
LOGGER.info(self.__str__())
def __len__(self): # override len(results)
return self.n
def __str__(self): # override print(results)
return self._run(pprint=True) # print results
def __repr__(self):
return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
class Proto(nn.Module):
# YOLOv5 mask Proto module for segmentation models
def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
super().__init__()
self.cv1 = Conv(c1, c_, k=3)
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.cv2 = Conv(c_, c_, k=3)
self.cv3 = Conv(c_, c2)
def forward(self, x):
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
class Classify(nn.Module):
# YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self,
c1,
c2,
k=1,
s=1,
p=None,
g=1,
dropout_p=0.0): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
super().__init__()
c_ = 1280 # efficientnet_b0 size
self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
self.drop = nn.Dropout(p=dropout_p, inplace=True)
self.linear = nn.Linear(c_, c2) # to x(b,c2)
def forward(self, x):
if isinstance(x, list):
x = torch.cat(x, 1)
return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))

View File

@ -0,0 +1,111 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Experimental modules
"""
import math
import numpy as np
import torch
import torch.nn as nn
from detecttracking.utils.downloads import attempt_download
class Sum(nn.Module):
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
def __init__(self, n, weight=False): # n: number of inputs
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
def forward(self, x):
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
# Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy
super().__init__()
n = len(k) # number of convolutions
if equal_ch: # equal c_ per group
i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(n)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * n
a = np.eye(n + 1, n, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList([
nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)])
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU()
def forward(self, x):
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
# Ensemble of models
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, device=None, inplace=True, fuse=True):
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
from models.yolo import Detect, Model
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=device) # load
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
# Model compatibility updates
if not hasattr(ckpt, 'stride'):
ckpt.stride = torch.tensor([32.])
if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode
# Module updates
for m in model.modules():
t = type(m)
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
m.inplace = inplace
if t is Detect and not isinstance(m.anchor_grid, list):
delattr(m, 'anchor_grid')
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model
if len(model) == 1:
return model[-1]
# Return detection ensemble
print(f'Ensemble created with {weights}\n')
for k in 'names', 'nc', 'yaml':
setattr(model, k, getattr(model[0], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
return model

View File

@ -0,0 +1,59 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Default anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9,11, 21,19, 17,41] # P3/8
- [43,32, 39,70, 86,64] # P4/16
- [65,131, 134,130, 120,265] # P5/32
- [282,180, 247,354, 512,387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28,41, 67,59, 57,141] # P3/8
- [144,103, 129,227, 270,205] # P4/16
- [209,452, 455,396, 358,812] # P5/32
- [653,922, 1109,570, 1387,1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11,11, 13,30, 29,20] # P3/8
- [30,46, 61,38, 39,92] # P4/16
- [78,80, 146,66, 79,163] # P5/32
- [149,150, 321,143, 157,303] # P6/64
- [257,402, 359,290, 524,372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19,22, 54,36, 32,77] # P3/8
- [70,83, 138,71, 75,173] # P4/16
- [165,159, 148,334, 375,151] # P5/32
- [334,317, 251,626, 499,474] # P6/64
- [750,326, 534,814, 1079,818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29,34, 81,55, 47,115] # P3/8
- [105,124, 207,107, 113,259] # P4/16
- [247,238, 222,500, 563,227] # P5/32
- [501,476, 376,939, 749,711] # P6/64
- [1126,489, 801,1222, 1618,1227] # P7/128

View File

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,14, 23,27, 37,58] # P4/16
- [81,82, 135,169, 344,319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head:
[[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

View File

@ -0,0 +1,51 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head:
[[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 BiFPN head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,42 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 FPN head
head:
[[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, C3, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, C3, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,54 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
]

View File

@ -0,0 +1,41 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Conv, [ 64, 6, 2, 2 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 6, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, SPPF, [ 1024, 5 ] ], # 9
]
# YOLOv5 v6.0 head with (P3, P4) outputs
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13
[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)
[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 20 (P4/16-medium)
[ [ 17, 20 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4)
]

View File

@ -0,0 +1,56 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,67 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 3, C3, [1280]],
[-1, 1, SPPF, [1280, 5]], # 13
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
head:
[[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 PANet head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,49 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3Ghost, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3Ghost, [512, False]], # 13
[-1, 1, GhostConv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,60 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19,27, 44,40, 38,94] # P3/8
- [96,68, 86,152, 180,137] # P4/16
- [140,301, 303,264, 238,542] # P5/32
- [436,615, 739,380, 925,792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.5 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

608
detecttracking/models/tf.py Normal file
View File

@ -0,0 +1,608 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
Usage:
$ python models/tf.py --weights yolov5s.pt
Export:
$ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras
from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
DWConvTranspose2d, Focus, autopad)
from models.experimental import MixConv2d, attempt_load
from models.yolo import Detect, Segment
from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args
class TFBN(keras.layers.Layer):
# TensorFlow BatchNormalization wrapper
def __init__(self, w=None):
super().__init__()
self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
epsilon=w.eps)
def call(self, inputs):
return self.bn(inputs)
class TFPad(keras.layers.Layer):
# Pad inputs in spatial dimensions 1 and 2
def __init__(self, pad):
super().__init__()
if isinstance(pad, int):
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
else: # tuple/list
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
def call(self, inputs):
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
class TFConv(keras.layers.Layer):
# Standard convolution
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
conv = keras.layers.Conv2D(
filters=c2,
kernel_size=k,
strides=s,
padding='SAME' if s == 1 else 'VALID',
use_bias=not hasattr(w, 'bn'),
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFDWConv(keras.layers.Layer):
# Depthwise convolution
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
conv = keras.layers.DepthwiseConv2D(
kernel_size=k,
depth_multiplier=c2 // c1,
strides=s,
padding='SAME' if s == 1 else 'VALID',
use_bias=not hasattr(w, 'bn'),
depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
return self.act(self.bn(self.conv(inputs)))
class TFDWConvTranspose2d(keras.layers.Layer):
# Depthwise ConvTranspose2d
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
# ch_in, ch_out, weights, kernel, stride, padding, groups
super().__init__()
assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
self.c1 = c1
self.conv = [
keras.layers.Conv2DTranspose(filters=1,
kernel_size=k,
strides=s,
padding='VALID',
output_padding=p2,
use_bias=True,
kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
def call(self, inputs):
return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
class TFFocus(keras.layers.Layer):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
# ch_in, ch_out, kernel, stride, padding, groups
super().__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
# inputs = inputs / 255 # normalize 0-255 to 0-1
inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
return self.conv(tf.concat(inputs, 3))
class TFBottleneck(keras.layers.Layer):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFCrossConv(keras.layers.Layer):
# Cross Convolution
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFConv2d(keras.layers.Layer):
# Substitution for PyTorch nn.Conv2D
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D(filters=c2,
kernel_size=k,
strides=s,
padding='VALID',
use_bias=bias,
kernel_initializer=keras.initializers.Constant(
w.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
def call(self, inputs):
return self.conv(inputs)
class TFBottleneckCSP(keras.layers.Layer):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
self.bn = TFBN(w.bn)
self.act = lambda x: keras.activations.swish(x)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
y1 = self.cv3(self.m(self.cv1(inputs)))
y2 = self.cv2(inputs)
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
class TFC3(keras.layers.Layer):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFC3x(keras.layers.Layer):
# 3 module with cross-convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
# ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([
TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
def call(self, inputs):
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFSPP(keras.layers.Layer):
# Spatial pyramid pooling layer used in YOLOv3-SPP
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
def call(self, inputs):
x = self.cv1(inputs)
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
# Spatial pyramid pooling-Fast layer
def __init__(self, c1, c2, k=5, w=None):
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
def call(self, inputs):
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer):
# TF YOLOv5 Detect layer
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
super().__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [tf.zeros(1)] * self.nl # init grid
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
self.training = False # set to False after building model
self.imgsz = imgsz
for i in range(self.nl):
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
self.grid[i] = self._make_grid(nx, ny)
def call(self, inputs):
z = [] # inference output
x = []
for i in range(self.nl):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference
y = x[i]
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), )
@staticmethod
def _make_grid(nx=20, ny=20):
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFSegment(TFDetect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
super().__init__(nc, anchors, ch, imgsz, w)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
self.detect = TFDetect.call
def call(self, x):
p = self.proto(x[0])
# p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p)
class TFProto(keras.layers.Layer):
def __init__(self, c1, c_=256, c2=32, w=None):
super().__init__()
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
self.cv3 = TFConv(c_, c2, w=w.cv3)
def call(self, inputs):
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
class TFUpsample(keras.layers.Layer):
# TF version of torch.nn.Upsample()
def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
super().__init__()
assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
# with default arguments: align_corners=False, half_pixel_centers=False
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
# size=(x.shape[1] * 2, x.shape[2] * 2))
def call(self, inputs):
return self.upsample(inputs)
class TFConcat(keras.layers.Layer):
# TF version of torch.concat()
def __init__(self, dimension=1, w=None):
super().__init__()
assert dimension == 1, 'convert only NCHW to NHWC concat'
self.d = 3
def call(self, inputs):
return tf.concat(inputs, self.d)
def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m_str = m
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [
nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3x]:
c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3x]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
elif m in [Detect, Segment]:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
args.append(imgsz)
else:
c2 = ch[f]
tf_m = eval('TF' + m_str.replace('nn.', ''))
m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
else tf_m(*args, w=model.model[i]) # module
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in torch_m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
return keras.Sequential(layers), sorted(save)
class TFModel:
# TF YOLOv5 model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
def predict(self,
inputs,
tf_nms=False,
agnostic_nms=False,
topk_per_class=100,
topk_all=100,
iou_thres=0.45,
conf_thres=0.25):
y = [] # outputs
x = inputs
for m in self.model.layers:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
x = m(x) # run
y.append(x if m.i in self.savelist else None) # save output
# Add TensorFlow NMS
if tf_nms:
boxes = self._xywh2xyxy(x[0][..., :4])
probs = x[0][:, :, 4:5]
classes = x[0][:, :, 5:]
scores = probs * classes
if agnostic_nms:
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
else:
boxes = tf.expand_dims(boxes, 2)
nms = tf.image.combined_non_max_suppression(boxes,
scores,
topk_per_class,
topk_all,
iou_thres,
conf_thres,
clip_boxes=False)
return (nms, )
return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
# return tf.concat([conf, cls, xywh], 1)
@staticmethod
def _xywh2xyxy(xywh):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
class AgnosticNMS(keras.layers.Layer):
# TF Agnostic NMS
def call(self, input, topk_all, iou_thres, conf_thres):
# wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
input,
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
name='agnostic_nms')
@staticmethod
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
boxes, classes, scores = x
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
scores_inp = tf.reduce_max(scores, -1)
selected_inds = tf.image.non_max_suppression(boxes,
scores_inp,
max_output_size=topk_all,
iou_threshold=iou_thres,
score_threshold=conf_thres)
selected_boxes = tf.gather(boxes, selected_inds)
padded_boxes = tf.pad(selected_boxes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
mode='CONSTANT',
constant_values=0.0)
selected_scores = tf.gather(scores_inp, selected_inds)
padded_scores = tf.pad(selected_scores,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode='CONSTANT',
constant_values=-1.0)
selected_classes = tf.gather(class_inds, selected_inds)
padded_classes = tf.pad(selected_classes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode='CONSTANT',
constant_values=-1.0)
valid_detections = tf.shape(selected_inds)[0]
return padded_boxes, padded_scores, padded_classes, valid_detections
def activations(act=nn.SiLU):
# Returns TF activation from input PyTorch activation
if isinstance(act, nn.LeakyReLU):
return lambda x: keras.activations.relu(x, alpha=0.1)
elif isinstance(act, nn.Hardswish):
return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
elif isinstance(act, (nn.SiLU, SiLU)):
return lambda x: keras.activations.swish(x)
else:
raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
def representative_dataset_gen(dataset, ncalib=100):
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
im = np.transpose(img, [1, 2, 0])
im = np.expand_dims(im, axis=0).astype(np.float32)
im /= 255
yield [im]
if n >= ncalib:
break
def run(
weights=ROOT / 'yolov5s.pt', # weights path
imgsz=(640, 640), # inference size h,w
batch_size=1, # batch size
dynamic=False, # dynamic batch size
):
# PyTorch model
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
_ = model(im) # inference
model.info()
# TensorFlow model
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
_ = tf_model.predict(im) # inference
# Keras model
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary()
LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
run(**vars(opt))
if __name__ == '__main__':
opt = parse_opt()
main(opt)

View File

@ -0,0 +1,391 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
YOLO-specific modules
Usage:
$ python models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import contextlib
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != 'Windows':
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import * # noqa
from models.experimental import * # noqa
from detecttracking.utils.autoanchor import check_anchor_order
from detecttracking.utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
from detecttracking.utils.plots import feature_visualization
from detecttracking.utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device,
time_sync)
try:
import thop # for FLOPs computation
except ImportError:
thop = None
class Detect(nn.Module):
# YOLOv5 Detect head for detection models
stride = None # strides computed during build
dynamic = False # force grid reconstruction
export = False # export mode
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use inplace ops (e.g. slice assignment)
def forward(self, x):
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
if isinstance(self, Segment): # (boxes + masks)
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
return x if self.training else (torch.cat(z, 1), ) if self.export else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, '1.10.0')):
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
yv, xv = torch.meshgrid(y, x, indexing='ij') if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
class Segment(Detect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) # protos
self.detect = Detect.forward
def forward(self, x):
p = self.proto(x[0])
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
class BaseModel(nn.Module):
# YOLOv5 base model
def forward(self, x, profile=False, visualize=False):
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _profile_one_layer(self, m, x, dt):
c = m == self.model[-1] # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x, ), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def info(self, verbose=False, img_size=640): # print model information
model_info(self, verbose, img_size)
def _apply(self, fn):
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
class DetectionModel(BaseModel):
# YOLOv5 detection model
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding='ascii', errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
s = 256 # 2x min stride
m.inplace = self.inplace
forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info('')
def forward(self, x, augment=False, profile=False, visualize=False):
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _descale_pred(self, p, flips, scale, img_size):
# de-scale predictions following augmented inference (inverse operation)
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
# Clip YOLOv5 augmented inference tails
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4 ** x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5:5 + m.nc] += math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
class SegmentationModel(DetectionModel):
# YOLOv5 segmentation model
def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel):
# YOLOv5 classification model
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index
super().__init__()
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
def _from_detection_model(self, model, nc=1000, cutoff=10):
# Create a YOLOv5 classification model from a YOLOv5 detection model
if isinstance(model, DetectMultiBackend):
model = model.model # unwrap DetectMultiBackend
model.model = model.model[:cutoff] # backbone
m = model.model[-1] # last layer
ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module
c = Classify(ch, nc) # Classify()
c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type
model.model[-1] = c # replace
self.model = model.model
self.stride = model.stride
self.save = []
self.nc = nc
def _from_yaml(self, cfg):
# Create a YOLOv5 classification model from a *.yaml file
self.model = None
def parse_model(d, ch): # model_dict, input_channels(3)
# Parse a YOLOv5 model.yaml dictionary
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--profile', action='store_true', help='profile model speed')
parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(vars(opt))
device = select_device(opt.device)
# Create model
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
model = Model(opt.cfg).to(device)
# Options
if opt.line_profile: # profile layer by layer
model(im, profile=True)
elif opt.profile: # profile forward-backward
results = profile(input=im, ops=[model], n=3)
elif opt.test: # test all models
for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
try:
_ = Model(cfg)
except Exception as e:
print(f'Error in {cfg}: {e}')
else: # report fused model summary
model.fuse()

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,48 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -0,0 +1,249 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 14 10:01:24 2024
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
class TrackFrag:
def __init__(self, boxes, imgshape=(1280, 1024)):
self.boxes = boxes
self.cls = int(boxes[0, 6])
self.tid = int(boxes[0, 4])
self.imgshape = imgshape
'''轨迹的持续时间以帧ID表征, (或考虑用时间戳)'''
self.during = (np.min(boxes[:, 7]), np.max(boxes[:, 7]))
self.groups = [set(np.unique(boxes[:, 7].astype(int)))]
# '''5个关键点中心点、左上点、右上点、左下点、右下点 )坐标'''
self.isCornpoint = self.is_cornpoint(10)
self.compute_cornpoints()
def is_cornpoint(self, edge=10):
isleft = min(self.boxes[:, 0]) < edge
istop = min(self.boxes[:, 1]) < edge
isright = max(self.boxes[:, 2]) > self.imgshape[0] - edge
isbottom = max(self.boxes[:, 3]) > self.imgshape[0] - edge
isCornpoint = isbottom or istop or isleft or isright
return isCornpoint
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((len(boxes), 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
trajdist = []
for k in range(5):
X = cornpoints[:, 2*k:2*(k+1)]
trajdist.append(np.max(cdist(X, X)))
idx = trajdist.index(min(trajdist))
self.trajdist_min = trajdist[idx]
self.cornpoints = cornpoints
def update_groups(self, THRESH=18):
'''
对 self.groups 重新赋值
'''
boxes = self.boxes
nbox = len(boxes)
X = np.zeros((len(boxes), 2))
X[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
X[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
dist2 = cdist(X, X)
# label = np.zeros(nbox, dtype=np.int)
marked, groups = set(), []
for k in range(nbox):
if k in marked:
continue
group = set()
dt = dist2[k, :]
idx = np.where(dt < THRESH)[0]
if len(idx) == 1:
groups.append({k})
marked.add(k)
continue
'''初始近邻样本点集合, 并移除当前点'''
seeds = set(idx)
seeds.remove(k)
group.add(k)
marked.add(k)
while len(seeds) !=0:
pt = seeds.pop()
dt = dist2[pt, :]
seed = set(np.where(dt < THRESH)[0])
seed.remove(pt)
seed.difference_update(marked)
seeds.update(seed)
group.add(pt)
marked.add(pt)
groups.append(group)
self.groups = groups
def jump_boxes(self):
gpboxes = []
for group in self.groups:
box = self.boxes[list(group), :]
gpboxes.append(box)
return gpboxes
def is_moving(self):
if len(self.groups)>=3:
return True
return False
def is_static(self, THRESH=50):
box1 = self.boxes[0, :4]
box2 = self.boxes[-1, :4]
''' 第1帧、最后一帧 boxes 四个角点间的距离 '''
ptd = box2 - box1
ptd1 = np.linalg.norm((ptd[0], ptd[1]))
ptd2 = np.linalg.norm((ptd[2], ptd[1]))
ptd3 = np.linalg.norm((ptd[0], ptd[3]))
ptd4 = np.linalg.norm((ptd[2], ptd[3]))
condt1 = ptd1<THRESH and ptd2<THRESH and ptd3<THRESH and ptd4<THRESH
if not self.isCornpoint:
self.trajdist_min < 120
# condt2 = self.TrajFeat[3] < 50
# condt = condt1 or condt2
return condt1
class MoveDetect:
def __init__(self, bboxes, imgshape=(1280, 1024)):
self.bboxes = bboxes
self.shape = imgshape
self.temp = np.zeros(imgshape, np.uint8)
self.trackIDs = np.unique(bboxes[:, 4].astype(int))
# self.frameID = np.unique(bboxes[:, 7].astype(int))
# self.fnum = len(self.frameID)
self.lboxes = self.array2list()
self.tracks = [TrackFrag(b) for b in self.lboxes]
def classify(self):
tracks = self.tracks
'''1. 提取手部轨迹轨迹'''
hand_tracks = [t for t in tracks if t.cls==0]
tracks = self.sub_tracks(tracks, hand_tracks)
'''2. 提取静止轨迹'''
tracks_static = [t for t in tracks if t.is_static()]
tracks = self.sub_tracks(tracks, tracks_static)
'''3. 更新轨迹点聚类'''
for track in tracks:
track.update_groups(18)
self.hand_tracks = hand_tracks
self.track_motion = [t for t in tracks if len(t.groups)>=3]
def draw(self):
pass
def array2list(self):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
track_ids = self.bboxes[:, 4].astype(int)
lboxes = []
for t_id in self.trackIDs:
# print(f"The ID is: {t_id}")
idx = np.where(track_ids == t_id)[0]
box = self.bboxes[idx, :]
lboxes.append(box)
return lboxes
@staticmethod
def sub_tracks(tlista, tlistb):
track_ids_b = {t.tid for t in tlistb}
return [t for t in tlista if t.tid not in track_ids_b]

270
detecttracking/pipeline.py Normal file
View File

@ -0,0 +1,270 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 29 08:59:21 2024
@author: ym
"""
import os
# import sys
import cv2
import pickle
import numpy as np
from pathlib import Path
from track_reid import yolo_resnet_tracker
from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks
from tracking.utils.drawtracks import plot_frameID_y2, draw_all_trajectories
from utils.getsource import get_image_pairs, get_video_pairs
from tracking.utils.read_data import read_similar
def save_subimgs(imgdict, boxes, spath, ctype):
for i in range(len(boxes)):
fid, bid = int(boxes[i, 7]), int(boxes[i, 8])
if f"{fid}_{bid}" in imgdict.keys():
img = imgdict[f"{fid}_{bid}"]
imgpath = spath / f"{ctype}_{fid}_{bid}.png"
cv2.imwrite(imgpath, img)
def pipeline(
eventpath,
savepath,
SourceType,
weights
):
'''
eventpath: 单个事件的存储路径
'''
if SourceType == "video":
vpaths = get_video_pairs(eventpath)
elif SourceType == "image":
vpaths = get_image_pairs(eventpath)
optdict = {}
optdict["weights"] = weights
event_tracks = []
## 构造购物事件字典
evtname = Path(eventpath).stem
barcode = evtname.split('_')[-1] if len(evtname.split('_'))>=2 \
and len(evtname.split('_')[-1])>=8 \
and evtname.split('_')[-1].isdigit() else ''
'''事件结果存储文件夹'''
if not savepath:
savepath = Path(__file__).resolve().parents[0] / "events_result"
savepath_pipeline = Path(savepath) / Path("Yolos_Tracking") / evtname
"""ShoppingDict pickle 文件保存地址 """
savepath_spdict = Path(savepath) / "ShoppingDict_pkfile"
if not savepath_spdict.exists():
savepath_spdict.mkdir(parents=True, exist_ok=True)
pf_path = Path(savepath_spdict) / Path(str(evtname)+".pickle")
# if pf_path.exists():
# return
ShoppingDict = {"eventPath": eventpath,
"eventName": evtname,
"barcode": barcode,
"eventType": '', # "input", "output", "other"
"frontCamera": {},
"backCamera": {},
"one2n": []
}
procpath = Path(eventpath).joinpath('process.data')
if procpath.is_file():
SimiDict = read_similar(procpath)
ShoppingDict["one2n"] = SimiDict['one2n']
for vpath in vpaths:
'''相机事件字典构造'''
CameraEvent = {"cameraType": '', # "front", "back"
"videoPath": '',
"imagePaths": [],
"yoloResnetTracker": [],
"tracking": [],
}
if isinstance(vpath, list):
CameraEvent["imagePaths"] = vpath
bname = os.path.basename(vpath[0])
if not isinstance(vpath, list):
CameraEvent["videoPath"] = vpath
bname = os.path.basename(vpath)
if bname.split('_')[0] == "0" or bname.find('back')>=0:
CameraEvent["cameraType"] = "back"
if bname.split('_')[0] == "1" or bname.find('front')>=0:
CameraEvent["cameraType"] = "front"
'''事件结果存储文件夹'''
if isinstance(vpath, list):
savepath_pipeline_imgs = savepath_pipeline / Path("images")
else:
savepath_pipeline_imgs = savepath_pipeline / Path(str(Path(vpath).stem))
if not savepath_pipeline_imgs.exists():
savepath_pipeline_imgs.mkdir(parents=True, exist_ok=True)
savepath_pipeline_subimgs = savepath_pipeline / Path("subimgs")
if not savepath_pipeline_subimgs.exists():
savepath_pipeline_subimgs.mkdir(parents=True, exist_ok=True)
'''Yolo + Resnet + Tracker'''
optdict["source"] = vpath
optdict["save_dir"] = savepath_pipeline_imgs
yrtOut = yolo_resnet_tracker(**optdict)
CameraEvent["yoloResnetTracker"] = yrtOut
# bboxes = np.empty((0, 9), dtype = np.float32)
# for frameDict in yrtOut:
# bboxes = np.concatenate([bboxes, frameDict["tboxes"]], axis=0)
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackefeats = {}
for frameDict in yrtOut:
tboxes = frameDict["tboxes"]
ffeats = frameDict["feats"]
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)), axis=0)
for i in range(len(tboxes)):
fid, bid = int(tboxes[i, 7]), int(tboxes[i, 8])
trackefeats.update({f"{fid}_{bid}": ffeats[f"{fid}_{bid}"]})
'''tracking'''
if CameraEvent["cameraType"] == "back":
vts = doBackTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("back", vts))
CameraEvent["tracking"] = vts
ShoppingDict["backCamera"] = CameraEvent
if CameraEvent["cameraType"] == "front":
vts = doFrontTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("front", vts))
CameraEvent["tracking"] = vts
ShoppingDict["frontCamera"] = CameraEvent
with open(str(pf_path), 'wb') as f:
pickle.dump(ShoppingDict, f)
for CamerType, vts in event_tracks:
if len(vts.tracks)==0: continue
if CamerType == 'front':
yolos = ShoppingDict["frontCamera"]["yoloResnetTracker"]
ctype = 1
if CamerType == 'back':
yolos = ShoppingDict["backCamera"]["yoloResnetTracker"]
ctype = 0
imgdict = {}
for y in yolos:
imgdict.update(y["imgs"])
for track in vts.Residual:
if isinstance(track, np.ndarray):
save_subimgs(imgdict, track, savepath_pipeline_subimgs, ctype)
else:
save_subimgs(imgdict, track.boxes, savepath_pipeline_subimgs, ctype)
'''轨迹显示模块'''
illus = [None, None]
for CamerType, vts in event_tracks:
if len(vts.tracks)==0: continue
if CamerType == 'front':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/board_ftmp_line.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipeline, CamerType, draw5p=True)
illus[0] = img_tracking
plt = plot_frameID_y2(vts)
plt.savefig(os.path.join(savepath_pipeline, "front_y2.png"))
if CamerType == 'back':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/edgeline.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipeline, CamerType, draw5p=True)
illus[1] = img_tracking
illus = [im for im in illus if im is not None]
if len(illus):
img_cat = np.concatenate(illus, axis = 1)
if len(illus)==2:
H, W = img_cat.shape[:2]
cv2.line(img_cat, (int(W/2), 0), (int(W/2), int(H)), (128, 128, 255), 3)
trajpath = os.path.join(savepath_pipeline, "trajectory.png")
cv2.imwrite(trajpath, img_cat)
def main():
'''
函数pipeline(),遍历事件文件夹,选择类型 image 或 video,
'''
parmDict = {}
evtdir = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\images"
parmDict["SourceType"] = "video" # video, image
parmDict["savepath"] = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result"
parmDict["weights"] = r'D:\DetectTracking\ckpts\best_cls10_0906.pt'
evtdir = Path(evtdir)
k, errEvents = 0, []
for item in evtdir.iterdir():
if item.is_dir():
# item = evtdir/Path("20241209-160201-b97f7a0e-7322-4375-9f17-c475500097e9_6926265317292")
parmDict["eventpath"] = item
# pipeline(**parmDict)
try:
pipeline(**parmDict)
except Exception as e:
errEvents.append(str(item))
k+=1
if k==1:
break
errfile = os.path.join(parmDict["savepath"], f'error_events.txt')
with open(errfile, 'w', encoding='utf-8') as f:
for line in errEvents:
f.write(line + '\n')
if __name__ == "__main__":
main()

View File

@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 29 08:59:21 2024
针对现场采集的视频利用算法pipeline提取运动轨迹内的subimg代替人工图像筛选、标注
@author: ym
"""
import os
import cv2
import pickle
from pathlib import Path
from track_reid import parse_opt, yolo_resnet_tracker
from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks
IMGFORMATS = '.bmp', '.jpeg', '.jpg', 'png', 'tif', 'tiff', 'webp', 'pfm'
VIDFORMATS = '.avi', '.gif', '.m4v', '.mkv', '.mov', '.mp4', '.ts', '.wmv'
std_feature_path = r"\\192.168.1.28\share\测试_202406\contrast\std_features_2192_ft32vsft16"
opt = parse_opt()
optdict = vars(opt)
def pipeline(eventpath, savepath):
# eventpath = r"\\192.168.1.28\share\测试_202406\0918\images1\20240918-110822-1bc3902e-5a8e-4e23-8eca-fb3f02738551_6938314601726"
optdict["project"] = savepath
'''Yolo + Resnet + Tracker'''
optdict["source"] = eventpath
optdict["save_dir"] = savepath
optdict["nosave"] = False
tracksdict = yolo_resnet_tracker(**optdict)
bboxes = tracksdict['TrackBoxes']
basename = os.path.basename(eventpath)
base, ext = os.path.splitext(basename)
if base.find('front')>=0:
vts = doFrontTracks(bboxes, tracksdict)
vts.classify()
if base.find('back')>=0:
vts = doBackTracks(bboxes, tracksdict)
vts.classify()
tracks = [t for t in vts.tracks if t.cls>0 and not t.is_static()]
# tracks = [t for t in vts.tracks if t.cls>0]
for track in tracks:
# for track in vts.Residual:
for *xyxy, tid, conf, cls, fid, bid in track.boxes:
img = tracksdict[f'frame_{int(fid)}']["imgs"][int(bid)]
imgpth = savepath / Path(f'{base}_tid-{int(tid)}_fid-{int(fid)}_bid-{int(bid)}.jpg')
cv2.imwrite(imgpth, img)
return len(vts.Residual)
def main():
videopath = r"\\192.168.1.28\share\上海中环店采集视频\21-25\videos\1\back"
savepath = r"D:\contrast\barcodes"
vpaths = []
for root, dirs, files in os.walk(videopath):
vpth = [os.path.join(root, f) for f in files if os.path.splitext(f)[-1] in VIDFORMATS]
vpaths.extend(vpth)
manual_txt = os.path.join(savepath, 'manual_videos.txt')
file = open(manual_txt, 'a', encoding='utf-8')
manual = []
k = 0
for vpath in vpaths:
videoname = os.path.basename(vpath)
vname, ext = os.path.splitext(videoname)
barcode = videoname.split('_')[0]
subpath = os.path.join(savepath, barcode, vname)
subpath = Path(subpath)
if not subpath.exists():
subpath.mkdir(parents=True, exist_ok=True)
ntract = pipeline(vpath, subpath)
if ntract==0:
manual.append(vpath)
file.write(vpath)
file.write("\n")
print(f"{videoname} done!!!")
k += 1
if k==10:
break
file.close()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,147 @@
# -*- coding: utf-8 -*-
"""
Created on Tuesday Jan 14 2025
@author: liujiawei
@description: 读取网络图片,并优化轨迹,截取子图
"""
import os
import sys
import cv2
import numpy as np
# from pipeline import pipeline
from detecttracking.tracking import traclus as tr
# from track_reid import parse_opt
from detecttracking.track_reid import yolo_resnet_tracker
from detecttracking.tracking.dotrack.dotracks_back import doBackTracks
from PIL import Image
def save_event_subimgs(imgs, bboxes):
img_list = {}
for i, box in enumerate(bboxes):
x1, y1, x2, y2, tid, score, cls, fid, bid = box
img_list[int(fid)] = imgs[fid][int(y1):int(y2), int(x1):int(x2), :]
return img_list
def get_optimized_bboxes(event_tracks):
vts_back = event_tracks
points = []
labels = []
for track in vts_back.Residual:
for ele in track.boxes:
points.append([int(ele[2]), int(ele[3])])
labels.append(int(ele[4])) # track_id
points = np.array(points)
partitions, indices = tr.partition(points, progress_bar=False, w_perpendicular=100, w_angular=10)
bboxes_opt = []
for track in vts_back.Residual:
for i in indices:
if i >= len(track.boxes): continue
if labels[i] == track.boxes[i][4]:
bboxes_opt.append(track.boxes[i])
return bboxes_opt
def get_tracking_info(
vpath,
resnetModel,
yoloModel,
SourceType = "video", # video
stdfeat_path = None
):
optdict = {}
optdict["weights"] = './detecttracking/tracking/ckpts/best_cls10_0906.pt'
optdict["yoloModel"] = yoloModel
optdict["resnetModel"] = resnetModel
optdict["is_save_img"] = False
optdict["is_save_video"] = False
event_tracks = []
video_frames = {}
'''Yolo + Resnet + Tracker'''
optdict["source"] = vpath
optdict["video_frames"] = video_frames
optdict["is_annotate"] = False
yrtOut = yolo_resnet_tracker(**optdict)
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackefeats = {}
for frameDict in yrtOut:
tboxes = frameDict["tboxes"]
ffeats = frameDict["feats"]
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)), axis=0)
for i in range(len(tboxes)):
fid, bid = int(tboxes[i, 7]), int(tboxes[i, 8])
trackefeats.update({f"{fid}_{bid}": ffeats[f"{fid}_{bid}"]})
vts = doBackTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("back", vts))
return event_tracks, video_frames
def stream_pipeline(stream_dict, resnetModel, yoloModel):
parmDict = {}
parmDict["vpath"] = stream_dict["video"]
# parmDict["savepath"] = os.path.join('pipeline_output', info_dict["barcode"])
parmDict["SourceType"] = "video" # video, image
parmDict["stdfeat_path"] = None
event_tracks, video_frames = get_tracking_info(**parmDict, resnetModel=resnetModel, yoloModel=yoloModel)
bboxes_opt = get_optimized_bboxes(event_tracks[0][1])
subimg_dict = save_event_subimgs(video_frames, bboxes_opt)
sub_images = []
for fid, img in subimg_dict.items():
pil_image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
sub_images.append(pil_image)
return sub_images
def main():
'''
sample stream_dict:
'''
stream_dict = {
"goodsName" : "优诺优丝黄桃果粒风味发酵乳",
"measureProperty" : 0,
"qty" : 1,
"price" : 25.9,
"weight": 560, # 单位克
"barcode": "6931806801024",
"video" : "https://ieemoo-ai.obs.cn-east-3.myhuaweicloud.com/videos/20231009/04/04_20231009-082149_21f2ca35-f2c2-4386-8497-3e7a3b407f03_4901872831197.mp4",
"goodsPic" : "https://ieemoo-storage.obs.cn-east-3.myhuaweicloud.com/lhpic/6931806801024.jpg",
"measureUnit" : "",
"goodsSpec" : "405g"
}
subimg_list = stream_pipeline(stream_dict)
save_path = os.path.join('subimg', stream_dict["barcode"])
if not os.path.exists(save_path):
os.makedirs(save_path)
else:
for filename in os.listdir(save_path):
file_path = os.path.join(save_path, filename)
if os.path.isfile(file_path):
os.unlink(file_path)
for i, img in enumerate(subimg_list):
img.save(f'{save_path}/frame_{i}.jpg')
print(f'Finish crop subimages {stream_dict["barcode"]}!')
if __name__ == "__main__":
main()

View File

@ -0,0 +1,720 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
$ python detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s_openvino_model # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
yolov5s_paddle_model # PaddlePaddle
"""
import argparse
import csv
import os
import platform
import sys
from pathlib import Path
import glob
import numpy as np
import pickle
import torch
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from detecttracking.utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from detecttracking.utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
from detecttracking.utils.torch_utils import select_device, smart_inference_mode
'''集成跟踪模块,输出跟踪结果文件 .npy'''
# from ultralytics.engine.results import Boxes # Results
# from ultralytics.utils import IterableSimpleNamespace, yaml_load
from tracking.utils.plotting import Annotator, colors
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load, boxes_add_fid
from tracking.trackers import BOTSORT, BYTETracker
from tracking.utils.showtrack import drawtracks
from hands.hand_inference import hand_pose
# from contrast.feat_extract.config import config as conf
from contrast.feat_extract.inference import FeatsInterface
# ReIDEncoder = FeatsInterface(conf)
IMG_FORMATS = '.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', '.webp', '.pfm' # include image suffixes
VID_FORMATS = '.asf', '.avi', '.gif', '.m4v', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.ts', '.wmv' # include video suffixes
# from tracking.trackers.reid.reid_interface import ReIDInterface
# from tracking.trackers.reid.config import config as ReIDConfig
# ReIDEncoder = ReIDInterface(ReIDConfig)
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
# def inference_image(image, detections):
# H, W, _ = np.shape(image)
# imgs = []
# batch_patches = []
# patches = []
# for d in range(np.size(detections, 0)):
# tlbr = detections[d, :4].astype(np.int_)
# tlbr[0] = max(0, tlbr[0])
# tlbr[1] = max(0, tlbr[1])
# tlbr[2] = min(W - 1, tlbr[2])
# tlbr[3] = min(H - 1, tlbr[3])
# img1 = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
# img = img1[:, :, ::-1].copy() # the model expects RGB inputs
# patch = ReIDEncoder.transform(img)
# imgs.append(img1)
# # patch = patch.to(device=self.device).half()
# if str(ReIDEncoder.device) != "cpu":
# patch = patch.to(device=ReIDEncoder.device).half()
# else:
# patch = patch.to(device=ReIDEncoder.device)
# patches.append(patch)
# if (d + 1) % ReIDEncoder.batch_size == 0:
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
# patches = []
# if len(patches):
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
# features = np.zeros((0, ReIDEncoder.embedding_size))
# for patches in batch_patches:
# pred = ReIDEncoder.model(patches)
# pred[torch.isinf(pred)] = 1.0
# feat = pred.cpu().data.numpy()
# features = np.vstack((features, feat))
# return imgs, features
def init_trackers(tracker_yaml=None, bs=1, resnetModel=None):
"""
Initialize trackers for object tracking during prediction.
"""
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
trackers = []
for _ in range(bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30, resnetModel=resnetModel)
trackers.append(tracker)
return trackers
'''=============== used in pipeline.py =================='''
@smart_inference_mode()
def yolo_resnet_tracker(
weights=ROOT / 'yolov5s.pt', # model path or triton URL
source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
save_dir='',
is_save_img=True,
is_save_video=True,
is_annotate=True,
tracker_yaml="./detecttracking/tracking/trackers/cfg/botsort.yaml",
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidencesL
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
video_frames=None,
resnetModel=None,
yoloModel=None
):
# source = str(source)
# Load model
# device = select_device(device)
# model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
model = yoloModel
ReIDEncoder = FeatsInterface(resnetModel)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
tracker = init_trackers(tracker_yaml, bs, resnetModel)[0]
dt = (Profile(), Profile(), Profile())
# trackerBoxes = np.empty((0, 9), dtype = np.float32)
yoloResnetTracker = []
for path, im, im0s, vid_cap, s in dataset:
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
# visualize = increment_path(project / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=False)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
if dataset.mode == "video":
frameId = dataset.frame
else:
frameId = dataset.count
# Process predictions
for i, det in enumerate(pred): # per image
im0 = im0s.copy()
annotator = Annotator(im0.copy(), line_width=line_thickness, example=str(names))
s += '%gx%g ' % im.shape[2:] # print string
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
det = det.cpu().numpy()
## ================================================================ writed by WQG
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
这里frame_index 也可以用视频的 帧ID 代替, box_index 保持不变
'''
det_tracking = Boxes(det, im0.shape).cpu().numpy()
tracks = tracker.update(det_tracking, im0)
if len(tracks) > 0:
tracks[:, 7] = frameId
# trackerBoxes = np.concatenate([trackerBoxes, tracks], axis=0)
'''================== 1. 存储 dets/subimgs/features Dict ============='''
imgs, features = ReIDEncoder.inference(im0, tracks)
imgdict, featdict = {}, {}
for ii, bid in enumerate(tracks[:, 8]):
featdict.update(
{f"{int(frameId)}_{int(bid)}": features[ii, :]}) # [f"feat_{int(bid)}"] = features[i, :]
imgdict.update({f"{int(frameId)}_{int(bid)}": imgs[ii]})
frameDict = {"path": path,
"fid": int(frameId),
"bboxes": det,
"tboxes": tracks,
"imgs": imgdict,
"feats": featdict}
yoloResnetTracker.append(frameDict)
# imgs, features = inference_image(im0, tracks)
# TrackerFeats = np.concatenate([TrackerFeats, features], axis=0)
'''================== 2. 提取手势位置 ==================='''
for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
name = ('' if id == -1 else f'id:{int(id)} ') + names[int(cls)]
label = None if hide_labels else (name if hide_conf else f'{name} {conf:.2f}')
if id >= 0 and cls == 0:
color = colors(int(cls), True)
elif id >= 0 and cls != 0:
color = colors(int(id), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
annotator.box_label(xyxy, label, color=color)
'''====== Save results (image and video) ======'''
# save_path = str(save_dir / Path(path).name) # 带有后缀名
if is_annotate:
im0 = annotator.result()
if is_save_img:
save_path_img = str(save_dir / Path(path).stem)
if dataset.mode == 'image':
imgpath = save_path_img + ".png"
else:
imgpath = save_path_img + f"_{frameId}.png"
cv2.imwrite(Path(imgpath), im0)
if video_frames is not None:
video_frames.update({frameId: im0})
# if dataset.mode == 'video' and is_save_video:
if is_save_video:
if dataset.mode == 'video':
vdieo_path = str(save_dir / Path(path).stem) + '.mp4' # 带有后缀名
else:
videoname = str(Path(path).stem).split('_')[0] + '.mp4'
vdieo_path = str(save_dir / videoname)
if vid_path[i] != vdieo_path: # new video
vid_path[i] = vdieo_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 25, im0.shape[1], im0.shape[0]
vdieo_path = str(Path(vdieo_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(vdieo_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
return yoloResnetTracker
@smart_inference_mode()
def run(
weights=ROOT / 'yolov5s.pt', # model path or triton URL
source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
tracker_yaml="./tracking/trackers/cfg/botsort.yaml",
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_csv=False, # save results in CSV format
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidencesL
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
):
'''
source: 视频文件或图像列表
'''
source = str(source)
# filename = os.path.split(source)[-1]
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
screenshot = source.lower().startswith('screen')
if is_url and is_file:
source = check_file(source) # download
# spth = source.split('\\')[-2] + "_" + Path(source).stem
save_dir = Path(project) / Path(source.split('\\')[-2] + "_" + str(Path(source).stem))
# save_dir = Path(project) / Path(source).stem
if save_dir.exists():
print(Path(source).stem)
# return
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
else:
save_dir.mkdir(parents=True, exist_ok=True)
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
seen, dt = 0, (Profile(), Profile(), Profile())
tracker = init_trackers(tracker_yaml, bs)[0]
handpose = hand_pose()
handlocals_dict = {}
boxes_and_imgs = []
BoxesFeats = []
track_boxes = np.empty((0, 9), dtype=np.float32)
det_boxes = np.empty((0, 9), dtype=np.float32)
DetBoxes = np.empty((0, 6), dtype=np.float32)
TrackerBoxes = np.empty((0, 9), dtype=np.float32)
TrackerFeats = np.empty((0, 256), dtype=np.float32)
features_dict = {}
TracksDict = {}
for path, im, im0s, vid_cap, s in dataset:
if save_img and 'imgshow' not in locals().keys():
imgshow = im0s.copy()
## ============================= tracking 功能只处理视频writed by WQG
# if dataset.mode == 'image':
# continue
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
s += '%gx%g ' % im.shape[2:] # print string
# im0_ant = im0.copy()
annotator = Annotator(im0.copy(), line_width=line_thickness, example=str(names))
nd = len(det)
if nd:
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
# det = det.cpu().numpy()
## ============================================================ 前后帧相同 boxes 的特征赋值
# def static_estimate(box1, box2, TH1=8, TH2=12):
# dij_abs = max(np.abs(box1 - box2))
# dij_euc = max([np.linalg.norm((box1[:2] - box2[:2])),
# np.linalg.norm((box1[2:4] - box2[2:4]))
# ])
# if dij_abs < TH1 and dij_euc < TH2:
# return True
# else:
# return False
# nw = 3 # 向前递推检查的窗口大小
# nf = len(BoxesFeats) # 已经检测+特征提取的帧数
# feat_curr = [None] * nd # nd: 当前帧检测出的boxes数
# for ii in range(nd):
# box = det[ii, :4]
# kk=1
# feat = None
# while kk <= nw and nf>=kk:
# ki = -1 * kk
# boxes_ = BoxesFeats[ki][0]
# feats_ = BoxesFeats[ki][1]
# flag = [jj for jj in range(len(boxes_)) if static_estimate(box, boxes_[jj, :4])]
# if len(flag) == 1:
# feat = feats_[flag[0]]
# break
# kk += 1
# if feat is not None:
# feat_curr[ii] = feat
## ================================================================ writed by WQG
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
这里frame_index 也可以用视频的 帧ID 代替, box_index 保持不变
'''
det_tracking = Boxes(det, im0.shape).cpu().numpy()
tracks = tracker.update(det_tracking, im0)
if len(tracks) == 0:
continue
if dataset.mode == "video":
frameId = dataset.frame
else:
frameId = dataset.count
tracks[:, 7] = frameId
tracks[:, 7] = frameId
'''================== 1. 存储 dets/subimgs/features Dict ============='''
# imgs, features = inference_image(im0, tracks)
imgs, features = ReIDEncoder.inference(im0, tracks)
TrackerFeats = np.concatenate([TrackerFeats, features], axis=0)
imgdict = {}
boxdict = {}
featdict = {}
for ii, bid in enumerate(tracks[:, 8]):
imgdict.update({int(bid): imgs[ii]}) # [f"img_{int(bid)}"] = imgs[i]
boxdict.update({int(bid): tracks[ii, :]}) # [f"box_{int(bid)}"] = tracks[i, :]
featdict.update({int(bid): features[ii, :]}) # [f"feat_{int(bid)}"] = features[i, :]
TracksDict[f"frame_{int(frameId)}"] = {"imgs": imgdict, "boxes": boxdict, "feats": featdict}
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
'''================== 2. 提取手势位置 ==================='''
# idx_0 = tracks[:, 6].astype(np.int_) == 0
# hn = 0
# for j, index in enumerate(idx_0):
# if index:
# track = tracks[j, :]
# hand_local, imgshow = handpose.get_hand_local(track, im0)
# handlocals_dict.update({int(track[7]): {int(track[8]): hand_local}})
# # '''yoloV5和手势检测的召回率并不一直用hand_local代替tracks中手部的(x1, y1, x2, y2),会使得两种坐标方式混淆'''
# # if hand_local: tracks[j, :4] = hand_local
# hn += 1
# cv2.imwrite(f"D:\DeepLearning\yolov5\hands\images\{Path(source).stem}_{int(track[7])}_{hn}.png", imgshow)
for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
name = ('' if id == -1 else f'id:{int(id)} ') + names[int(cls)]
label = None if hide_labels else (name if hide_conf else f'{name} {conf:.2f}')
if id >= 0 and cls == 0:
color = colors(int(cls), True)
elif id >= 0 and cls != 0:
color = colors(int(id), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
annotator.box_label(xyxy, label, color=color)
# Save results (image and video with tracking)
im0 = annotator.result()
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
if save_img:
save_path_img, ext = os.path.splitext(save_path)
if dataset.mode == 'image':
imgpath = save_path_img + ".png"
else:
imgpath = save_path_img + f"_{frameId}.png"
cv2.imwrite(Path(imgpath), im0)
if dataset.mode == 'video':
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
if track_boxes.size == 0:
return
## ======================================================================== written by WQG
## track_boxes: Array, [x1, y1, x2, y2, track_id, score, cls, frame_index, box_id]
TracksDict.update({"TrackBoxes": track_boxes})
'''上面保存了检测结果是视频和图像,以下还保存五种类型的数据'''
filename = os.path.split(save_path_img)[-1]
'''======================== 1. save in './run/detect/' ===================='''
if source.find("front") >= 0 or Path(source).stem.split('_')[0] == '1':
carttemp = cv2.imread("./tracking/shopcart/cart_tempt/board_ftmp_line.png")
else:
carttemp = cv2.imread("./tracking/shopcart/cart_tempt/edgeline.png")
imgshow = drawtracks(track_boxes, carttemp)
showpath_1 = save_path_img + "_show.png"
cv2.imwrite(Path(showpath_1), imgshow)
'''======================== 2. save dets/subimgs/features Dict =================='''
trackdicts_dir = Path('./tracking/data/trackdicts/')
if not trackdicts_dir.exists():
trackdicts_dir.mkdir(parents=True, exist_ok=True)
trackdicts_dir = trackdicts_dir.joinpath(f'{filename}.pkl')
with open(trackdicts_dir, 'wb') as file:
pickle.dump(TracksDict, file)
# np.save(f'{filename}.npy', DetBoxes)
'''======================== 3. save hand_local data =================='''
# handlocal_dir = Path('./tracking/data/handlocal/')
# if not handlocal_dir.exists():
# handlocal_dir.mkdir(parents=True, exist_ok=True)
# handlocal_path = handlocal_dir.joinpath(f'{filename}.pkl')
# with open(handlocal_path, 'wb') as file:
# pickle.dump(handlocals_dict, file)
# Print results
t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
def parse_opt():
modelpath = ROOT / 'ckpts/best_cls10_0906.pt' # 'ckpts/best_15000_0908.pt', 'ckpts/yolov5s.pt', 'ckpts/best_20000_cls30.pt, best_yolov5m_250000'
'''datapath为视频文件目录或视频文件'''
datapath = r"D:/datasets/ym/videos/标记视频/" # ROOT/'data/videos', ROOT/'data/images' images
# datapath = r"D:\datasets\ym\highvalue\videos"
# datapath = r"D:/dcheng/videos/"
# modelpath = ROOT / 'ckpts/yolov5s.pt'
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=modelpath,
help='model path or triton URL') # 'yolov5s.pt', best_15000_0908.pt
parser.add_argument('--source', type=str, default=datapath,
help='file/dir/URL/glob/screen/0(webcam)') # images, videos
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-csv', action='store_true', help='save results in CSV format')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def find_video_imgs(root_dir):
all_files = []
extensions = ['.mp4']
for dirpath, dirnames, filenames in os.walk(root_dir):
for filename in filenames:
file, ext = os.path.splitext(filename)
if ext in IMG_FORMATS + VID_FORMATS:
all_files.append(os.path.join(dirpath, filename))
return all_files
def main():
'''
run(): 单张图像或单个视频文件的推理,不支持图像序列,
'''
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
opt = parse_opt()
optdict = vars(opt)
# p = r"D:\datasets\ym\永辉测试数据_比对"
# p = r"D:\datasets\ym\广告板遮挡测试\8"
# p = r"D:\datasets\ym\videos\标记视频"
# p = r"D:\datasets\ym\实验室测试"
# p = r"D:\datasets\ym\永辉双摄视频\新建文件夹"
# p = r"\\192.168.1.28\share\测试_202406\0723\0723_2\20240723-112522_"
# p = r"D:\datasets\ym\联华中环"
# p = r"D:\exhibition\images\153112511_0_seek_105.mp4"
# p = r"D:\exhibition\images\image"
p = r"\\192.168.1.28\share\数据\原始数据\小物品数据\视频\82654976401_20241213-143457_front_addGood_5478c9a53bbe_40_17700000001.mp4"
optdict["project"] = r"D:\小物品入侵检测\result"
# optdict["project"] = r"D:\exhibition\result"
if os.path.isdir(p):
files = find_video_imgs(p)
k = 0
for file in files:
optdict["source"] = file
run(**optdict)
k += 1
if k == 1:
break
elif os.path.isfile(p):
optdict["source"] = p
run(**optdict)
if __name__ == '__main__':
main()

View File

Binary file not shown.

View File

@ -0,0 +1,650 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:16:01 2024
@author: ym
"""
import numpy as np
import cv2
from pathlib import Path
from scipy.spatial.distance import cdist
from tracking.utils.mergetrack import track_equal_track, readDict
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class MoveState:
"""商品运动状态标志"""
Static = 0
DownWard = 1
UpWard = 2
FreeMove = 3
Unknown = -1
class ShoppingCart:
def __init__(self, bboxes):
self.bboxes = bboxes
self.loadrate = self.load_rate()
def load_rate(self):
bboxes = self.bboxes
fid = min(bboxes[:, 7])
idx = bboxes[:, 7] == fid
boxes = bboxes[idx]
temp = np.zeros(self.incart.shape, np.uint8)
for i in range(boxes.shape[0]):
x1, y1, x2, y2, tid = boxes[i, 0:5]
cv2.rectangle(temp, (int(x1), int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
'''1. and 滤除购物车边框外的干扰'''
loadstate = cv2.bitwise_and(self.incart, temp)
'''2. xor 得到购物车内内被填充的区域'''
# loadstate = cv2.bitwise_xor(self.incart, temp1)
num_loadstate = cv2.countNonZero(loadstate)
num_incart = cv2.countNonZero(self.incart)
loadrate = num_loadstate / (num_incart+0.01)
# edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png", cv2.IMREAD_GRAYSCALE)
# cv2.imwrite(f"./test/temp.png", cv2.add(temp, edgeline))
# cv2.imwrite(f"./test/incart.png", cv2.add(self.incart, edgeline))
# cv2.imwrite(f"./test/loadstate.png", cv2.add(loadstate, edgeline))
return loadrate
@property
def incart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def outcart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
@property
def cartedge(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/cartedge.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
class Track:
'''抽象基类,不能实例化对象'''
def __init__(self, boxes, features=None, imgshape=(1024, 1280)):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
# assert len(set(boxes[:, 4].astype(int))) == 1, "For a Track, track_id more than 1"
# assert len(set(boxes[:, 6].astype(int))) == 1, "For a Track, class number more than 1"
self.boxes = boxes
self.features = features
self.tid = int(boxes[0, 4])
self.cls = int(boxes[0, 6])
self.frnum = boxes.shape[0]
self.isCornpoint = False
self.imgshape = imgshape
# self.isBorder = False
# self.state = MoveState.Unknown
'''轨迹开始帧、结束帧 ID'''
# self.start_fid = int(np.min(boxes[:, 7]))
# self.end_fid = int(np.max(boxes[:, 7]))
''''''
self.Hands = []
self.HandsIou = []
self.Goods = []
self.GoodsIou = []
'''5个关键点中心点、左上点、右上点、左下点、右下点 )坐标'''
self.compute_cornpoints()
'''5个关键点轨迹特征可以在子类中实现降低顺序处理时的计算量
(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
'''应计算各个角点面积、平均面积'''
mw, mh = np.mean(boxes[:, 2]-boxes[:, 0]), np.mean((boxes[:, 3]-boxes[:, 1]))
self.mwh = np.mean((mw, mh))
self.Area = mw * mh
'''
最后一帧与第一帧间的位移:
vshift: 正值为向下,负值为向上
hshift: 负值为向购物车边框两边移动,正值为物品向中心移动
'''
self.vshift = self.cornpoints[-1, 1] - self.cornpoints[0, 1] # 纵向位移
self.hshift = abs(self.cornpoints[0, 0] - self.imgshape[0]/2) - \
abs(self.cornpoints[-1, 0] - self.imgshape[0]/2)
'''手部状态分析'''
self.HAND_STATIC_THRESH = 100
if self.cls == 0:
self.extract_hand_features()
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((self.frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
self.cornpoints = cornpoints
def compute_cornpts_feats(self):
'''
'''
# print(f"TrackID: {self.tid}")
trajectory = []
trajlens = []
trajdist = []
trajrects = []
trajrects_wh = []
for k in range(5):
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
X = self.cornpoints[:, 2*k:2*(k+1)]
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
trajectory.append(traj)
trajlen = np.sum(traj)
trajlens.append(trajlen)
ptdist = np.max(cdist(X, X))
trajdist.append(ptdist)
'''最小外接矩形:
rect[0]: 中心(x, y)
rect[1]: (w, h)
rect[0]: 旋转角度 (-90°, 0]
'''
rect = cv2.minAreaRect(X.astype(np.int64))
rect_wh = max(rect[1])
trajrects_wh.append(rect_wh)
trajrects.append(rect)
self.trajectory = trajectory
self.trajlens = trajlens
self.trajdist = trajdist
self.trajrects = trajrects
self.trajrects_wh = trajrects_wh
def trajfeature(self):
'''
分两种情况计算轨迹特征(检测框边界不在图像边界范围内,在图像边界范围内):
-最小长度轨迹trajmin
-最小轨迹长度trajlen_min
-最小轨迹欧氏距离trajdist_max
'''
# idx1 = self.trajlens.index(max(self.trajlens))
idx1 = self.trajrects_wh.index(max(self.trajrects_wh))
trajmax = self.trajectory[idx1]
trajlen_max = self.trajlens[idx1]
trajdist_max = self.trajdist[idx1]
if not self.isCornpoint:
# idx2 = self.trajlens.index(min(self.trajlens))
idx2 = self.trajrects_wh.index(min(self.trajrects_wh))
trajmin = self.trajectory[idx2]
trajlen_min = self.trajlens[idx2]
trajdist_min = self.trajdist[idx2]
else:
trajmin = self.trajectory[0]
trajlen_min = self.trajlens[0]
trajdist_min = self.trajdist[0]
'''最小轨迹长度/最大轨迹长度,越小,代表运动幅度越小'''
trajlen_rate = trajlen_min/(trajlen_max+0.0001)
'''最小轨迹欧氏距离/目标框尺度均值'''
trajdist_rate = trajdist_min/(self.mwh+0.0001)
self.trajmin = trajmin
self.trajmax = trajmax
self.TrajFeat = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
def pt_state_fids(self, det_y, STATIC_THRESH = 8):
'''
前摄时y一般选择为 box 的 y1 坐标,且需限定商品在购物车内。
inputs
y1D array
parameters
STATIC_THRESH轨迹处于静止状态的阈值。
outputs
输出为差分值小于 STATIC_THRESH 的y中元素的start, end索引
ranges = [(x1, y1),
(x1, y1),
...]
'''
# print(f"The ID is: {self.tid}")
# det_y = np.diff(y, axis=0)
ranges, rangex = [], []
static_indices = np.where(np.abs(det_y) < STATIC_THRESH)[0]
if len(static_indices) == 0:
rangex.append((0, len(det_y)))
return ranges, rangex
start_index = static_indices[0]
for i in range(1, len(static_indices)):
if static_indices[i] != static_indices[i-1] + 1:
ranges.append((start_index, static_indices[i-1] + 1))
start_index = static_indices[i]
ranges.append((start_index, static_indices[-1] + 1))
if len(ranges) == 0:
rangex.append((0, len(det_y)))
return ranges, rangex
idx1, idx2 = ranges[0][0], ranges[-1][1]
if idx1 != 0:
rangex.append((0, idx1))
# 轨迹的最后阶段是运动状态
for k in range(1, len(ranges)):
index1 = ranges[k-1][1]
index2 = ranges[k][0]
rangex.append((index1, index2))
if idx2 != len(det_y):
rangex.append((idx2, len(det_y)))
return ranges, rangex
def PositionState(self, camerType="back"):
'''
camerType: back, 后置摄像头
front, 前置摄像头
'''
if camerType=="back":
incart = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart.png'), cv2.IMREAD_GRAYSCALE)
else:
incart = cv2.imread(str(parpath/'shopcart/cart_tempt/incart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/'shopcart/cart_tempt/outcart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
# incart = cv2.imread('./cart_tempt/incart_ftmp.png', cv2.IMREAD_GRAYSCALE)
# outcart = cv2.imread('./cart_tempt/outcart_ftmp.png', cv2.IMREAD_GRAYSCALE)
xc, yc = self.cornpoints[:,0].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,1].clip(0,self.imgshape[1]-1).astype(np.int64)
x1, y1 = self.cornpoints[:,6].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,7].clip(0,self.imgshape[1]-1).astype(np.int64)
x2, y2 = self.cornpoints[:,8].clip(0,self.imgshape[0]-1).astype(np.int64), self.cornpoints[:,9].clip(0,self.imgshape[1]-1).astype(np.int64)
# print(self.tid)
Cent_inCartnum = np.count_nonzero(incart[(yc, xc)])
LB_inCartnum = np.count_nonzero(incart[(y1, x1)])
RB_inCartnum = np.count_nonzero(incart[(y2, x2)])
Cent_outCartnum = np.count_nonzero(outcart[(yc, xc)])
LB_outCartnum = np.count_nonzero(outcart[(y1, x1)])
RB_outCartnum = np.count_nonzero(outcart[(y2, x2)])
'''Track完全在车内左下角点、右下角点与 outcart 的交集为 0'''
self.isWholeInCart = False
if LB_outCartnum + RB_outCartnum == 0:
self.isWholeInCart = True
'''Track完全在车外左下角点、中心点与 incart 的交集为 0
右下角点、中心点与 incart 的交集为 0
'''
self.isWholeOutCart = False
if Cent_inCartnum + LB_inCartnum == 0 or Cent_inCartnum + RB_inCartnum == 0:
self.isWholeOutCart = True
self.Cent_isIncart = False
self.LB_isIncart = False
self.RB_isIncart = False
if Cent_inCartnum: self.Cent_isIncart = True
if LB_inCartnum: self.LB_isIncart = True
if RB_inCartnum: self.RB_isIncart = True
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart
def is_freemove(self):
# if self.tid==4:
# print(f"track ID: {self.tid}")
# boxes = self.boxes
# features = self.features
# similars = 1 - np.maximum(0.0, cdist(self.features, self.features, metric = 'cosine'))
box1 = self.boxes[0, :4]
box2 = self.boxes[-1, :4]
''' 第1帧、最后一帧subimg的相似度 '''
feat1 = self.features[0, :][None, :]
feat2 = self.features[-1, :][None, :]
similar = 1 - np.maximum(0.0, cdist(feat1, feat2, metric = 'cosine'))
condta = similar > 0.8
''' 第1帧、最后一帧 boxes 四个角点间的距离 '''
ptd = box2 - box1
ptd1 = np.linalg.norm((ptd[0], ptd[1]))
ptd2 = np.linalg.norm((ptd[2], ptd[1]))
ptd3 = np.linalg.norm((ptd[0], ptd[3]))
ptd4 = np.linalg.norm((ptd[2], ptd[3]))
condtb = ptd1<50 and ptd2<50 and ptd3<50 and ptd4<50
condt = condta and condtb
return condt
def extract_hand_features(self):
assert self.cls == 0, "The class of traj must be HAND!"
self.isHandStatic = False
x0 = (self.boxes[:, 0] + self.boxes[:, 2]) / 2
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
handXY = np.stack((x0, y0), axis=-1)
# handMaxY0 = np.max(y0)
handCenter = np.array([(max(x0)+min(x0))/2, (max(y0)+min(y0))/2])
handMaxDist = np.max(np.linalg.norm(handXY - handCenter))
if handMaxDist < self.HAND_STATIC_THRESH:
self.isHandStatic = True
return
class doTracks:
def __init__(self, bboxes, trackefeats):
'''fundamental property
trackefeats: dict, key 格式 "fid_bid"
'''
self.bboxes = bboxes
# self.TracksDict = TracksDict
self.frameID = np.unique(bboxes[:, 7].astype(int))
self.trackID = np.unique(bboxes[:, 4].astype(int))
self.lboxes = self.array2list()
self.lfeats = self.getfeats(trackefeats)
'''对 self.tracks 中的元素进行分类,将 track 归入相应列表中'''
self.Hands = []
self.Kids = []
self.Static = []
self.Residual = []
self.Confirmed = []
self.DownWard = [] # subset of self.Residual
self.UpWard = [] # subset of self.Residual
self.FreeMove = [] # subset of self.Residual
def array2list(self):
'''
将 bboxes 变换为 track 列表
bboxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
Return
lboxes列表列表中元素具有同一 track_idx1y1x2y2 格式
[x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
'''
track_ids = self.bboxes[:, 4].astype(int)
lboxes = []
for t_id in self.trackID:
# print(f"The ID is: {t_id}")
idx = np.where(track_ids == t_id)[0]
box = self.bboxes[idx, :]
assert len(set(box[:, 7])) == len(box), "Please check!!!"
lboxes.append(box)
return lboxes
def getfeats(self, trackefeats):
lboxes = self.lboxes
lfeats = []
for boxes in lboxes:
feats = []
for i in range(boxes.shape[0]):
fid, bid = int(boxes[i, 7]), int(boxes[i, 8])
key = f"{int(fid)}_{int(bid)}"
if key in trackefeats:
feats.append(trackefeats[key])
feats = np.asarray(feats, dtype=np.float32)
lfeats.append(feats)
return lfeats
def similarity(self):
nt = len(self.tracks)
similar_dict = {}
if nt >= 2:
for i in range(nt):
for j in range(i, nt):
tracka = self.tracks[i]
trackb = self.tracks[j]
similar = self.feat_similarity(tracka, trackb)
similar_dict.update({(tracka.tid, trackb.tid): similar})
return similar_dict
def feat_similarity(self, tracka, trackb, metric='cosine'):
boxes_a, boxes_b = tracka.boxes, trackb.boxes
na, nb = tracka.boxes.shape[0], trackb.boxes.shape[0]
feata, featb = [], []
for i in range(na):
fid, bid = tracka.boxes[i, 7:9]
feata.append(self.features_dict[fid][bid])
for i in range(nb):
fid, bid = trackb.boxes[i, 7:9]
featb.append(self.features_dict[fid][bid])
feata = np.asarray(feata, dtype=np.float32)
featb = np.asarray(featb, dtype=np.float32)
similarity_matrix = 1-np.maximum(0.0, cdist(feata, featb, metric))
feata_m = np.mean(feata, axis =0)[None,:]
featb_m = np.mean(featb, axis =0)[None,:]
simi_ab = 1 - cdist(feata_m, featb_m, metric)
print(f'tid {int(boxes_a[0, 4])} vs {int(boxes_b[0, 4])}: {simi_ab[0][0]}')
# return np.max(similarity_matrix)
return simi_ab
def merge_tracks_loop(self, alist):
na, nb = len(alist), 0
while na!=nb:
na = len(alist)
alist = self.merge_tracks(alist) #func is from subclass
nb = len(alist)
return alist
def base_merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
"""
mergedTracks = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
# afids = []
# for track in cur_list:
# afids.extend(list(track.boxes[:, 7].astype(np.int_)))
# bfids = btrack.boxes[:, 7].astype(np.int_)
# interfid = set(afids).intersection(set(bfids))
# if len(interfid):
# print("wait!!!")
# if track_equal_track(atrack, btrack) and len(interfid)==0:
if track_equal_track(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
mergedTracks.append(cur_list)
return mergedTracks
@staticmethod
def join_tracks(tlista, tlistb):
"""Combine two lists of stracks into a single one."""
exists = {}
res = []
for t in tlista:
exists[t.tid] = 1
res.append(t)
for t in tlistb:
tid = t.tid
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
@staticmethod
def sub_tracks(tlista, tlistb):
track_ids_b = {t.tid for t in tlistb}
return [t for t in tlista if t.tid not in track_ids_b]
def array2frame(self, bboxes):
frameID = np.sort(np.unique(bboxes[:, 7].astype(int)))
fboxes = []
for fid in frameID:
idx = np.where(bboxes[:, 7] == fid)[0]
box = bboxes[idx, :]
fboxes.append(box)
return fboxes
def isintrude(self):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
OverlapNum = 3
bboxes = self.bboxes.astype(np.int64)
fboxes = self.array2frame(bboxes)
incart = cv2.bitwise_not(self.incart)
sum_incart = np.zeros(incart.shape, dtype=np.int64)
for fid, boxes in enumerate(fboxes):
for i in range(len(boxes)):
x1, y1, x2, y2 = boxes[i, 0:4]
sum_incart[y1:y2, x1:x2] += 1
sumincart = np.zeros(sum_incart.shape, dtype=np.uint8)
idx255 = np.where(sum_incart >= OverlapNum)
sumincart[idx255] = 255
idxnzr = np.where(sum_incart!=0)
base = np.zeros(sum_incart.shape, dtype=np.uint8)
base[idxnzr] = 255
contours_sum, _ = cv2.findContours(sumincart, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours_base, _ = cv2.findContours(base, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
have_existed, invasion = [], []
for k, ct_temp in enumerate(contours_base):
tmp1 = np.zeros(sum_incart.shape, dtype=np.uint8)
cv2.drawContours(tmp1, [ct_temp], -1, 255, cv2.FILLED)
# 确定轮廓的包含关系
for ct_sum in contours_sum:
tmp2 = np.zeros(sum_incart.shape, dtype=np.uint8)
cv2.drawContours(tmp2, [ct_sum], -1, 255, cv2.FILLED)
tmp = cv2.bitwise_and(tmp1, tmp2)
if np.count_nonzero(tmp) == np.count_nonzero(tmp2):
have_existed.append(k)
inIdx = [i for i in range(len(contours_base)) if i not in have_existed]
invasion = np.zeros(sum_incart.shape, dtype=np.uint8)
for i in inIdx:
cv2.drawContours(invasion, [contours_base[i]], -1, 255, cv2.FILLED)
cv2.imwrite("./result/intrude/invasion.png", invasion)
Intrude = True if len(inIdx)>=1 else False
print(f"is intruded: {Intrude}")
return Intrude

View File

@ -0,0 +1,267 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:36:31 2024
@author: ym
"""
import numpy as np
import cv2
import copy
from tracking.utils.mergetrack import track_equal_track
from scipy.spatial.distance import cdist
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
from .dotracks import doTracks, ShoppingCart
from .track_back import backTrack
class doBackTracks(doTracks):
def __init__(self, bboxes, trackefeats):
super().__init__(bboxes, trackefeats)
self.tracks = [backTrack(b, f) for b, f in zip(self.lboxes, self.lfeats)]
# self.similar_dict = self.similarity()
# self.shopcart = ShoppingCart(bboxes)
self.incart = self.getincart()
def getincart(self):
img1 = cv2.imread(str(parpath/'shopcart/cart_tempt/incart.png'), cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread(str(parpath/'shopcart/cart_tempt/cartedge.png'), cv2.IMREAD_GRAYSCALE)
ret, binary1 = cv2.threshold(img1, 250, 255, cv2.THRESH_BINARY)
ret, binary2 = cv2.threshold(img2, 250, 255, cv2.THRESH_BINARY)
binary = cv2.bitwise_or(binary1, binary2)
return binary
def classify(self):
'''功能:对 tracks 中元素分类 '''
tracks = self.tracks
# 提取手的frame_id并和动目标的frame_id 进行关联
hand_tracks = [t for t in tracks if t.cls==0]
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
# 提取小孩的track并计算状态left, right, incart
kid_tracks = [t for t in tracks if t.cls==9]
kid_states = [self.kid_state(t) for t in kid_tracks]
self.Kids = [x for x in zip(kid_tracks, kid_states)]
tracks = self.sub_tracks(tracks, kid_tracks)
out_trcak = [t for t in tracks if t.isWholeOutCart]
tracks = self.sub_tracks(tracks, out_trcak)
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
self.Static.extend(static_tracks)
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
tracks_free = [t for t in tracks if t.frnum>1 and t.is_freemove()]
self.FreeMove.extend(tracks_free)
tracks = self.sub_tracks(tracks, tracks_free)
# '''购物框边界外具有运动状态的干扰目标'''
# out_trcak = [t for t in tracks if t.is_OutTrack()]
# tracks = self.sub_tracks(tracks, out_trcak)
'''轨迹循环归并'''
# merged_tracks = self.merge_tracks(tracks)
merged_tracks = self.merge_tracks_loop(tracks)
[self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in tracks]
tracks = [t for t in merged_tracks if t.frnum > 1]
self.merged_tracks = merged_tracks
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
self.Static.extend(static_tracks)
tracks = self.sub_tracks(tracks, static_tracks)
# for gtrack in tracks:
# for htrack in hand_tracks:
# hand_ious = self.associate_with_hand(htrack, gtrack)
# if len(hand_ious):
# gtrack.Hands.append(htrack)
# gtrack.HandsIou.append(hand_ious)
# htrack.Goods.append((gtrack, hand_ious))
# for htrack in hand_tracks:
# self.merge_based_hands(htrack)
self.Residual = tracks
self.Confirmed = self.confirm_track()
def confirm_track(self):
Confirmed = None
mindist = 0
for track in self.Residual:
md = min(track.trajrects_wh)
if md > mindist:
mindist = copy.deepcopy(md)
Confirmed = copy.deepcopy(track)
if Confirmed is not None:
return [Confirmed]
return []
# def merge_based_hands(self, htrack):
# gtracks = htrack.Goods
# if len(gtracks) >= 2:
# atrack, afious = gtracks[0]
# btrack, bfious = gtracks[1]
def associate_with_hand(self, htrack, gtrack):
'''
迁移至基类:
手部 Track、商品 Track 建立关联的依据:
a. 运动帧的帧索引有交集
b. 帧索引交集部分iou均大于0
'''
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
hand_ious = []
hboxes = np.empty(shape=(0, 9), dtype = np.float64)
gboxes = np.empty(shape=(0, 9), dtype = np.float64)
# start, end 为索引值,需要 start:(end+1)
for start, end in htrack.moving_index:
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
for start, end in gtrack.moving_index:
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
fids = sorted(set(hfids).intersection(set(gfids)))
if len(fids)==0:
return None
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
for f in fids:
h = np.where(hboxes[:,7] == f)[0][0]
g = np.where(gboxes[:,7] == f)[0][0]
x11, y11, x12, y12 = hboxes[h, 0:4]
x21, y21, x22, y22 = gboxes[g, 0:4]
x1, y1 = max((x11, x21)), max((y11, y21))
x2, y2 = min((x12, x22)), min((y12, y22))
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
area1 = (x12 - x11) * (y12 - y11)
area2 = (x22 - x21) * (y22 - y21)
iou = union / (area1 + area2 - union + 1e-6)
if iou >= 0.01:
gtrack.Hands.append((htrack.tid, f, iou))
return gtrack.Hands
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
和 dotrack_front.py中函数相同可以合并可以合并至基类
"""
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
feats = np.empty((0, 256), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
ifeats = track.features.copy()
# iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
feats = np.concatenate((feats, ifeats), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
feats_fid = feats[fid_indices]
newtracks.append(backTrack(boxes_fid, feats_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged
def kid_state(self, track):
left_dist = track.cornpoints[:, 2]
right_dist = 1024 - track.cornpoints[:, 4]
if np.sum(left_dist<30)/track.frnum>0.8 and np.sum(right_dist>512)/track.frnum>0.7:
kidstate = "left"
elif np.sum(left_dist>512)/track.frnum>0.7 and np.sum(right_dist<30)/track.frnum>0.8:
kidstate = "right"
else:
kidstate = "incart"
return kidstate
def isuptrack(self, track):
Flag = False
return Flag
def isdowntrack(self, track):
Flag = False
return Flag
def isfreetrack(self, track):
Flag = False
return Flag

View File

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:38:20 2024
@author: ym
"""
import cv2
import copy
import numpy as np
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
# from tracking.utils.mergetrack import track_equal_track
from .dotracks import doTracks
from .track_front import frontTrack
class doFrontTracks(doTracks):
def __init__(self, bboxes, frameDictList):
super().__init__(bboxes, frameDictList)
# self.tracks = [frontTrack(b) for b in self.lboxes]
self.tracks = [frontTrack(b, f) for b, f in zip(self.lboxes, self.lfeats)]
self.incart = self.getincart()
def getincart(self):
img = cv2.imread(str(parpath/'shopcart/cart_tempt/incart_ftmp.png'), cv2.IMREAD_GRAYSCALE)
ret, binary = cv2.threshold(img, 250, 255, cv2.THRESH_BINARY)
return binary
def classify(self):
'''功能:对 tracks 中元素分类 '''
tracks = self.tracks
'''提取手的 tracks'''
hand_tracks = [t for t in tracks if t.cls==0]
self.Hands.extend(hand_tracks)
tracks = self.sub_tracks(tracks, hand_tracks)
'''提取小孩的 tracks'''
kid_tracks = [t for t in tracks if t.cls==9]
tracks = self.sub_tracks(tracks, kid_tracks)
out_trcak = [t for t in tracks if t.isWholeOutCart]
tracks = self.sub_tracks(tracks, out_trcak)
'''静态 tracks'''
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
'''剔除静止目标后的 tracks'''
tracks = self.sub_tracks(tracks, static_tracks)
tracks_free = [t for t in tracks if t.frnum>1 and t.is_freemove()]
self.FreeMove.extend(tracks_free)
tracks = self.sub_tracks(tracks, tracks_free)
# [self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in tracks]
'''轨迹循环归并'''
merged_tracks = self.merge_tracks_loop(tracks)
[self.associate_with_hand(htrack, gtrack) for htrack in hand_tracks for gtrack in merged_tracks]
tracks = [t for t in merged_tracks if t.frnum > 1]
# for gtrack in tracks:
# # print(f"Goods ID:{gtrack.tid}")
# for htrack in hand_tracks:
# hand_ious = self.associate_with_hand(htrack, gtrack)
# if len(hand_ious):
# gtrack.Hands.append(htrack)
# gtrack.HandsIou.append(hand_ious)
'''静止 tracks 判断与剔除静止 tracks'''
static_tracks = [t for t in tracks if t.frnum>1 and t.is_static()]
tracks = self.sub_tracks(tracks, static_tracks)
freemoved_tracks = [t for t in tracks if t.is_free_move()]
tracks = self.sub_tracks(tracks, freemoved_tracks)
self.Residual = tracks
self.Confirmed = self.confirm_track()
def confirm_track(self):
Confirmed = None
mindist = 0
for track in self.Residual:
md = min(track.trajrects_wh)
if md > mindist:
mindist = copy.deepcopy(md)
Confirmed = copy.deepcopy(track)
if Confirmed is not None:
return [Confirmed]
return []
def associate_with_hand(self, htrack, gtrack):
'''
迁移至基类:
手部 Track、商品 Track 建立关联的依据:
a. 运动帧的帧索引有交集
b. 帧索引交集部分iou均大于0
'''
assert htrack.cls==0 and gtrack.cls!=0 and gtrack.cls!=9, 'Track cls is Error!'
hboxes = np.empty(shape=(0, 9), dtype = np.float)
gboxes = np.empty(shape=(0, 9), dtype = np.float)
# start, end 为索引值,需要 start:(end+1)
for start, end in htrack.dynamic_y2:
hboxes = np.concatenate((hboxes, htrack.boxes[start:end+1, :]), axis=0)
for start, end in gtrack.dynamic_y1:
gboxes = np.concatenate((gboxes, gtrack.boxes[start:end+1, :]), axis=0)
hfids, gfids = hboxes[:, 7], gboxes[:, 7]
fids = sorted(set(hfids).intersection(set(gfids)))
if len(fids)==0:
return None
# print(f"Goods ID: {gtrack.tid}, Hand ID: {htrack.tid}")
for f in fids:
h = np.where(hfids==f)[0][0]
g = np.where(gfids==f)[0][0]
x11, y11, x12, y12 = hboxes[h, 0:4]
x21, y21, x22, y22 = gboxes[g, 0:4]
x1, y1 = max((x11, x21)), max((y11, y21))
x2, y2 = min((x12, x22)), min((y12, y22))
union = (x2 - x1).clip(0) * (y2 - y1).clip(0)
area1 = (x12 - x11) * (y12 - y11)
area2 = (x22 - x21) * (y22 - y21)
iou = union / (area1 + area2 - union + 1e-6)
if iou >= 0.01:
gtrack.Hands.append((htrack.tid, f, iou))
return gtrack.Hands
def merge_tracks(self, Residual):
"""
对不同id但可能是同一商品的目标进行归并
和 dotrack_back.py中函数相同可以合并至基类
"""
mergedTracks = self.base_merge_tracks(Residual)
oldtracks, newtracks = [], []
for tracklist in mergedTracks:
if len(tracklist) > 1:
boxes = np.empty((0, 9), dtype=np.float32)
feats = np.empty((0, 256), dtype=np.float32)
for i, track in enumerate(tracklist):
if i==0: ntid, ncls=track.boxes[0, 4], track.boxes[0, 6]
iboxes = track.boxes.copy()
ifeats = track.features.copy()
# iboxes[:, 4], iboxes[:, 6] = ntid, ncls
boxes = np.concatenate((boxes, iboxes), axis=0)
feats = np.concatenate((feats, ifeats), axis=0)
oldtracks.append(track)
fid_indices = np.argsort(boxes[:, 7])
boxes_fid = boxes[fid_indices]
feats_fid = feats[fid_indices]
newtracks.append(frontTrack(boxes_fid, feats_fid))
elif len(tracklist) == 1:
oldtracks.append(tracklist[0])
newtracks.append(tracklist[0])
redu = self.sub_tracks(Residual, oldtracks)
merged = self.join_tracks(redu, newtracks)
return merged

View File

@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:28:47 2024
@author: ym
"""
import cv2
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
from .dotracks import MoveState, Track
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class backTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, features, imgshape=(1024, 1280)):
super().__init__(boxes, features, imgshape)
'''该函数依赖项: self.cornpoints
MarginState: list, seven elements, 表示轨迹中boxes出现在图像的
[左上,右上,左中,右中,左下,右下底部]
'''
self.isCornpoint, self.MarginState = self.isimgborder()
'''该函数依赖项: self.isCornpoint不能在父类中初始化'''
self.trajfeature()
'''静止点帧索引'''
# self.static_index = self.compute_static_fids()
'''运动点帧索引(运动帧两端的静止帧索引)'''
# self.moving_index = self.compute_dynamic_fids()
self.static_index, self.moving_index = self.compute_static_dynamic_fids()
'''该函数依赖项: self.cornpoints定义 4 个商品位置变量:
self.Cent_isIncart, self.LB_isIncart, self.RB_isIncart
self.posState = self.Cent_isIncart+self.LB_isIncart+self.RB_isIncart'''
self.PositionState(camerType="back")
'''self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates'''
self.compute_ious_feat()
def isimgborder(self, BoundPixel=10, BoundThresh=0.3):
x1, y1 = self.cornpoints[:,2], self.cornpoints[:,3],
x2, y2 = self.cornpoints[:,8], self.cornpoints[:,9]
condt1 = sum(abs(x1)<BoundPixel) / self.frnum > BoundThresh
condt2 = sum(abs(y1)<BoundPixel) / self.frnum > BoundThresh
condt3 = sum(abs(x2-self.imgshape[0])<BoundPixel) / self.frnum > BoundThresh
condt4 = sum(abs(y2-self.imgshape[1])<BoundPixel) / self.frnum > BoundThresh
condt = condt1 or condt2 or condt3 or condt4
isCornpoint = False
if condt:
isCornpoint = True
condtA = condt1 and condt2
condtB = condt3 and condt2
condtC = condt1 and not condt2 and not condt4
condtD = condt3 and not condt2 and not condt4
condtE = condt1 and condt4
condtF = condt3 and condt4
condtG = condt4 and not condt1 and not condt3
MarginState = [condtA, condtB, condtC, condtD, condtE, condtF, condtG]
return isCornpoint, MarginState
def PCA(self):
self.pca = PCA()
X = self.cornpoints[:, 0:2]
self.pca.fit(X)
def compute_ious_feat(self):
'''输出:
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
其中:
boxes流track中所有boxes形成的轨迹图可分为三部分incart, outcart, cartboarder
incart_iou, outcart_iou, cartboarder_iou各部分和 boxes流的 iou。
incart_iou = 0track在购物车外
outcart_iou = 0track在购物车内也可能是通过左下角、右下角置入购物车
maxbox_iou, minbox_ioutrack中最大、最小 box 和boxes流的iou二者差值越小越接近 1表明track的运动型越小。
incartrates: 各box和incart的iou时序由小变大反应的是置入过程由大变小反应的是取出过程
'''
incart = cv2.imread(str(parpath/"shopcart/cart_tempt/incart.png"), cv2.IMREAD_GRAYSCALE)
outcart = cv2.imread(str(parpath/"shopcart/cart_tempt/outcart.png"), cv2.IMREAD_GRAYSCALE)
cartboarder = cv2.imread(str(parpath/"shopcart/cart_tempt/cartboarder.png"), cv2.IMREAD_GRAYSCALE)
incartrates = []
temp = np.zeros(incart.shape, np.uint8)
maxarea, minarea = 0, self.imgshape[0]*self.imgshape[1]
for i in range(self.frnum):
# x, y, w, h = self.boxes[i, 0:4]
x = (self.boxes[i, 2] + self.boxes[i, 0]) / 2
w = (self.boxes[i, 2] - self.boxes[i, 0]) / 2
y = (self.boxes[i, 3] + self.boxes[i, 1]) / 2
h = (self.boxes[i, 3] - self.boxes[i, 1]) / 2
if w*h > maxarea: maxarea = w*h
if w*h < minarea: minarea = w*h
cv2.rectangle(temp, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp1 = np.zeros(incart.shape, np.uint8)
cv2.rectangle(temp1, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), 255, cv2.FILLED)
temp2 = cv2.bitwise_and(incart, temp1)
inrate = cv2.countNonZero(temp1)/(w*h)
incartrates.append(inrate)
isincart = cv2.bitwise_and(incart, temp)
isoutcart = cv2.bitwise_and(outcart, temp)
iscartboarder = cv2.bitwise_and(cartboarder, temp)
num_temp = cv2.countNonZero(temp)
num_incart = cv2.countNonZero(isincart)
num_outcart = cv2.countNonZero(isoutcart)
num_cartboarder = cv2.countNonZero(iscartboarder)
incart_iou = num_incart/num_temp
outcart_iou = num_outcart/num_temp
cartboarder_iou = num_cartboarder/num_temp
maxbox_iou = maxarea/num_temp
minbox_iou = minarea/num_temp
self.feature_ious = (incart_iou, outcart_iou, cartboarder_iou, maxbox_iou, minbox_iou)
self.incartrates = incartrates
def compute_static_dynamic_fids(self):
if self.MarginState[0] or self.MarginState[2]:
idx1 = 4
elif self.MarginState[1] or self.MarginState[3]:
idx1 = 3
elif self.MarginState[4]:
idx1 = 2
elif self.MarginState[5]:
idx1 = 1
elif self.MarginState[6]:
if self.trajlens[1] < self.trajlens[2]:
idx1 = 1
else:
idx1 = 2
else:
idx1 = self.trajlens.index(min(self.trajlens))
# idx1 = self.trajlens.index(min(self.trajlens))
trajmin = self.trajectory[idx1]
static, dynamic = self.pt_state_fids(trajmin)
static = np.array(static)
dynamic = np.array(dynamic)
if static.size:
indx = np.argsort(static[:, 0])
static = static[indx]
if dynamic.size:
indx = np.argsort(dynamic[:, 0])
dynamic = dynamic[indx]
return static, dynamic
def is_static(self):
'''静态情况 1: 目标关键点最小相对运动轨迹 < 0.2, 指标值偏大
TrajFeat = [trajlen_min, trajlen_max,
trajdist_min, trajdist_max,
trajlen_rate, trajdist_rate]
'''
# print(f"TrackID: {self.tid}")
boxes = self.boxes
'''静态情况 1: '''
condt1 = self.TrajFeat[5] < 0.2 or self.TrajFeat[3] < 120
'''静态情况 2: 目标初始状态为静止,适当放宽关键点最小相对运动轨迹 < 0.5'''
condt2 = self.static_index.size > 0 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 5 \
and self.TrajFeat[5] < 0.5 \
and self.TrajFeat[1] < 240 \
and self.isWholeInCart
# and self.posState >= 2
# and self.TrajFeat[0] < 240 \
'''静态情况 3: 目标初始状态和最终状态均为静止'''
condt3 = self.static_index.shape[0] >= 2 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 5 \
and self.static_index[-1, 1] >= self.frnum-3 \
and self.TrajFeat[1] < 240 \
and self.isWholeInCart
# and self.posState >= 2
# and self.TrajFeat[0] < 240 \
condt4 = self.static_index.shape[0] >= 2 \
and self.static_index[0, 0] <= 2 \
and self.static_index[0, 1] >= 6 \
and self.static_index[-1, 0] <= self.frnum-5 \
and self.static_index[-1, 1] >= self.frnum-2
condt = condt1 or condt2 or condt3 or condt4
return condt
def is_OutTrack(self):
if self.posState <= 1:
isout = True
else:
isout = False
return isout
def compute_distance(self):
pass
def move_start_fid(self):
pass
def move_end_fid(self):
pass

View File

@ -0,0 +1,194 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 4 18:33:01 2024
@author: ym
"""
import numpy as np
import cv2
# from sklearn.cluster import KMeans
from .dotracks import MoveState, Track
from pathlib import Path
curpath = Path(__file__).resolve().parents[0]
curpath = Path(curpath)
parpath = curpath.parent
class frontTrack(Track):
# boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0, 1, 2, 3, 4, 5, 6, 7, 8
def __init__(self, boxes, features, imgshape=(1024, 1280)):
super().__init__(boxes, features, imgshape)
self.CART_HIGH_THRESH1 = imgshape[1]/2.98
'''y1、y2静止状态区间值是 boxes 中对 axis=0 的索引,不是帧索引'''
det_y1 = np.diff(boxes[:, 1], axis=0)
det_y2 = np.diff(boxes[:, 3], axis=0)
self.static_y1, self.dynamic_y1 = self.pt_state_fids(det_y1)
self.static_y2, self.dynamic_y2 = self.pt_state_fids(det_y2)
self.isCornpoint = self.is_left_or_right_cornpoint()
self.isBotmpoint = self.is_bottom_cornpoint()
'''该函数依赖项: self.isCornpoint不能在父类中初始化'''
self.trajfeature()
self.PositionState(camerType="front")
'''手部状态分析'''
self.HAND_STATIC_THRESH = 100
self.CART_POSIT_0 = 430
self.CART_POSIT_1 = 620
def is_left_or_right_cornpoint(self):
''' 基于 all(boxes)
boxes左下角点和图像左下角点重叠 或
boxes右下角点和图像左下角点重叠
'''
x1, y1 = self.boxes[:, 0], self.boxes[:, 1]
x2, y2 = self.boxes[:, 2], self.boxes[:, 3]
# Left-Bottom cornpoint
condt1 = all(x1 < 5) and all(y2 > self.imgshape[1]-5)
# Right-Bottom cornpoint
condt2 = all(x2 > self.imgshape[0]-5) and all(y2 > self.imgshape[1]-5)
condt = condt1 or condt2
return condt
def is_edge_cornpoint(self):
'''基于 all(boxes)boxes是否和图像左右边缘重叠'''
x1, x2 = self.boxes[:, 0], self.boxes[:, 2]
condt = all(x1 < 3) or all(x2 > self.imgshape[0]-3)
return condt
def is_bottom_cornpoint(self):
'''基于 all(boxes)boxes是否和图像下边缘重叠'''
condt = all(self.boxes[:, 3] > self.imgshape[1]-20)
return condt
def is_static(self):
assert self.frnum > 1, "boxes number must greater than 1"
# print(f"The ID is: {self.tid}")
# 手部和小孩目标不考虑
if self.cls == 0 or self.cls == 9:
return False
# boxes 全部 y2=1280
if self.isBotmpoint:
return True
boxes = self.boxes
y0 = (boxes[:, 1]+boxes[:, 3])/2
## 纵轴矢量和
sum_y0 = y0[-1] - y0[0]
sum_y1 = boxes[-1, 1]-boxes[0, 1]
sum_y2 = boxes[-1, 3]-boxes[0, 3]
# 一些需要考虑的特殊情况
isbottom = max(boxes[:, 3]) > 1280-3
istop = min(boxes[:, 1]) < 3
isincart = min(y0) > self.CART_HIGH_THRESH1
uncert = abs(sum_y1)<100 and abs(sum_y2)<100
'''初始条件:商品中心点始终在购物车内、'''
condt0 = max((boxes[:, 1]+boxes[:, 3])/2) > self.CART_HIGH_THRESH1
'''条件1轨迹运动纵向和y1 或 y2描述商品轨迹长度存在情况
(1). 检测框可能与图像上下边缘重合,
(2). 上边或下边存在跳动
'''
if isbottom and istop:
condt1 = abs(sum_y0) < 300
elif isbottom: # y2在底部用y1表征运动
condt1 = sum_y1 > -120 and abs(sum_y0)<80 # 有底部点方向向上阈值小于100
elif istop: # y1在顶部用y2表征运动
condt1 = abs(sum_y2) < 100
else:
condt1 = (abs(sum_y1) < 30 or abs(sum_y2)<30)
'''条件2轨迹的开始和结束阶段均处于静止状态, 利用静止状态区间判断,用 y1
a. 商品在购物车内,
b. 检测框的起始阶段和结束阶段均为静止状态
c. 静止帧长度 > 3'''
condt2 = False
if len(self.static_y1)>=2:
condt_s0 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 3
condt_s1 = self.static_y1[-1][1]==self.frnum-1 and self.static_y1[-1][1] - self.static_y1[-1][0] >= 3
condt2 = condt_s0 and condt_s1 and isincart
condt = condt0 and (condt1 or condt2)
return condt
def is_upward(self):
'''判断商品是否取出,'''
print(f"The ID is: {self.tid}")
def is_free_move(self):
if self.frnum == 1:
return True
# print(f"The ID is: {self.tid}")
y0 = (self.boxes[:, 1] + self.boxes[:, 3]) / 2
det_y0 = np.diff(y0, axis=0)
sum_y0 = y0[-1] - y0[0]
'''情况1中心点向下 '''
## 初始条件:商品第一次检测到在购物车内
condt0 = y0[0] > self.CART_HIGH_THRESH1
condt_a = False
## 条件1商品初始为静止状态静止条件应严格一些
condt11, condt12 = False, False
if len(self.static_y1)>0:
condt11 = self.static_y1[0][0]==0 and self.static_y1[0][1] - self.static_y1[0][0] >= 5
if len(self.static_y2)>0:
condt12 = self.static_y2[0][0]==0 and self.static_y2[0][1] - self.static_y2[0][0] >= 5
# 条件2商品中心发生向下移动
condt2 = y0[-1] > y0[0]
# 综合判断a
condt_a = condt0 and (condt11 or condt12) and condt2
'''情况2中心点向上 '''
## 商品中心点向上移动但没有关联的Hand轨迹也不是左右边界点
condt_b = condt0 and len(self.Hands)==0 and y0[-1] < y0[0] and (not self.is_edge_cornpoint()) and min(y0)>self.CART_HIGH_THRESH1
'''情况3: 商品在购物车内,但运动方向无序'''
## 中心点在购物车内,纵向轨迹和小于轨迹差中绝对值最大的两个值的和,说明运动没有主方向
condt_c = False
if self.frnum > 3:
condt_c = all(y0>self.CART_HIGH_THRESH1) and \
(abs(sum_y0) < sum(np.sort(np.abs(det_y0))[::-1][:2])-1)
condt = (condt_a or condt_b or condt_c) and self.cls!=0
return condt

View File

@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 29 10:28:21 2024
未来需将这一部分和轨迹分析代码集成
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
class TProp:
def __init__(self, boxes):
self.boxes = boxes
class TProp:
'''抽象基类,不能实例化对象'''
def __init__(self, boxes):
'''
boxes: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
# assert len(set(boxes[:, 4].astype(int))) == 1, "For a Track, track_id more than 1"
# assert len(set(boxes[:, 6].astype(int))) == 1, "For a Track, class number more than 1"
self.boxes = boxes
'''5个关键点中心点、左上点、右上点、左下点、右下点 )坐标'''
self.compute_cornpoints()
'''5个关键点轨迹特征可以在子类中实现降低顺序处理时的计算量
(中心点、左上点、右上点、左下点、右下点 )轨迹特征'''
self.compute_cornpts_feats()
self.distmax = max(self.trajdist)
def compute_cornpoints(self):
'''
cornpoints 共10项分别是个点的坐标值x, y
(center, top_left, top_right, bottom_left, bottom_right)
'''
boxes = self.boxes
cornpoints = np.zeros((self.frnum, 10))
cornpoints[:,0] = (boxes[:, 0] + boxes[:, 2]) / 2
cornpoints[:,1] = (boxes[:, 1] + boxes[:, 3]) / 2
cornpoints[:,2], cornpoints[:,3] = boxes[:, 0], boxes[:, 1]
cornpoints[:,4], cornpoints[:,5] = boxes[:, 2], boxes[:, 1]
cornpoints[:,6], cornpoints[:,7] = boxes[:, 0], boxes[:, 3]
cornpoints[:,8], cornpoints[:,9] = boxes[:, 2], boxes[:, 3]
self.cornpoints = cornpoints
def compute_cornpts_feats(self):
'''
'''
trajectory = []
trajlens = []
trajdist = []
trajrects = []
for k in range(5):
# diff_xy2 = np.power(np.diff(self.cornpoints[:, 2*k:2*(k+1)], axis = 0), 2)
# trajlen = np.sum(np.sqrt(np.sum(diff_xy2, axis = 1)))
X = self.cornpoints[:, 2*k:2*(k+1)]
traj = np.linalg.norm(np.diff(X, axis=0), axis=1)
trajectory.append(traj)
trajlen = np.sum(traj)
trajlens.append(trajlen)
ptdist = np.max(cdist(X, X))
trajdist.append(ptdist)
'''最小外接矩形:
rect[0]: 中心(x, y)
rect[1]: (w, h)
rect[0]: 旋转角度 (-90°, 0]
'''
rect = cv2.minAreaRect(X.astype(np.int64))
trajrects.append(rect)
self.trajectory = trajectory
self.trajlens = trajlens
self.trajdist = trajdist
self.trajrects = trajrects

View File

@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 23 11:04:48 2024
@author: ym
"""
import numpy as np
import cv2
from scipy.spatial.distance import cdist
# from trackers.utils import matching
def readDict(boxes, feat_dicts):
feat = []
for i in range(boxes.shape[0]):
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
feat.append(feat_dicts[fid][bid])
# img = feat_dicts[fid][f'{bid}_img']
# cv2.imwrite(f'./result/imgs/{tid}_{fid}_{bid}.png', img)
return np.asarray(feat, dtype=np.float32)
def track_equal_track(atrack, btrack, feat_dicts):
# boxes: [x, y, w, h, track_id, score, cls, frame_index, box_index]
aboxes = atrack.boxes
bboxes = btrack.boxes
''' 1. 判断轨迹在时序上是否有交集 '''
afids = aboxes[:, 7].astype(np.int_)
bfids = bboxes[:, 7].astype(np.int_)
# 帧索引交集
interfid = set(afids).intersection(set(bfids))
# 或者直接判断帧索引是否有交集,返回 Ture or False
# interfid = set(afids).isdisjoint(set(bfids))
''' 2. 轨迹空间iou'''
alabel = np.array([0] * afids.size, dtype=np.int_)
blabel = np.array([1] * bfids.size, dtype=np.int_)
label = np.concatenate((alabel, blabel), axis=0)
fids = np.concatenate((afids, bfids), axis=0)
indices = np.argsort(fids)
idx_pair = []
for i in range(len(indices)-1):
idx1, idx2 = indices[i], indices[i+1]
if label[idx1] != label[idx2] and fids[idx2] - fids[idx1] == 1:
if label[idx1] == 0:
a_idx = idx1
b_idx = idx2-alabel.size
else:
a_idx = idx2
b_idx = idx1-alabel.size
idx_pair.append((a_idx, b_idx))
ious = []
for a, b in idx_pair:
abox, bbox = aboxes[a, :], bboxes[b, :]
xa1, ya1 = abox[0] - abox[2]/2, abox[1] - abox[3]/2
xa2, ya2 = abox[0] + abox[2]/2, abox[1] + abox[3]/2
xb1, yb1 = bbox[0] - bbox[2]/2, bbox[1] - bbox[3]/2
xb2, yb2 = bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2
inter = (np.minimum(xb2, xa2) - np.maximum(xb1, xa1)).clip(0) * \
(np.minimum(yb2, ya2) - np.maximum(yb1, ya1)).clip(0)
# Union Area
box1_area = abox[2] * abox[3]
box2_area = bbox[2] * bbox[3]
union = box1_area + box2_area - inter + 1e-6
ious.append(inter/union)
''' 3. 轨迹特征相似度判断'''
afeat = readDict(aboxes, feat_dicts)
bfeat = readDict(bboxes, feat_dicts)
feat = np.concatenate((afeat, bfeat), axis=0)
emb_simil = 1-np.maximum(0.0, cdist(feat, feat, 'cosine'))
emb_ = 1-cdist(np.mean(afeat, axis=0)[None, :], np.mean(bfeat, axis=0)[None, :], 'cosine')
cont1 = False if len(interfid) else True
cont2 = all(iou>0.5 for iou in ious)
cont3 = emb_[0, 0]>0.75
cont = cont1 and cont2 and cont3
return cont
def track_equal_str(atrack, btrack):
if atrack == btrack:
return True
else:
return False
def merge_track(Residual):
out_list = []
alist = [t for t in Residual]
while alist:
atrack = alist[0]
cur_list = []
cur_list.append(atrack)
alist.pop(0)
blist = [b for b in alist]
alist = []
for btrack in blist:
if track_equal_str(atrack, btrack):
cur_list.append(btrack)
else:
alist.append(btrack)
out_list.append(cur_list)
return out_list
def main():
Residual = ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'b', 'c', 'd']
out_list = merge_track(Residual)
print(Residual)
print(out_list)
if __name__ == "__main__":
main()
# =============================================================================
# for i, atrack in enumerate(input_list):
# cur_list = []
# cur_list.append(atrack)
# del input_list[i]
#
# for j, btrack in enumerate(input_list):
# if track_equal(atrack, btrack):
# cur_list.append(btrack)
# del input_list[j]
#
# out_list.append(cur_list)
# =============================================================================

View File

@ -0,0 +1,459 @@
# -*- coding: utf-8 -*-
"""
Created on Thu May 30 14:03:03 2024
现场测试性能分析
@author: ym
"""
import os
import cv2
import numpy as np
from pathlib import Path
import warnings
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.plotting import Annotator, colors, draw_tracking_boxes
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load
from tracking.trackers import BOTSORT, BYTETracker
from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks
from tracking.utils.drawtracks import plot_frameID_y2, draw_all_trajectories
from tracking.utils.read_data import extract_data, read_deletedBarcode_file, read_tracking_output, read_returnGoods_file
from contrast.one2n_contrast import get_contrast_paths, one2n_return
from tracking.utils.annotator import TrackAnnotator
W, H = 1024, 1280
Mode = 'front' #'back'
ImgFormat = ['.jpg', '.jpeg', '.png', '.bmp']
'''调用tracking()函数,利用本地跟踪算法获取各目标轨迹,可以比较本地跟踪算法与现场跟踪算法的区别。'''
def init_tracker(tracker_yaml = None, bs=1):
"""
Initialize tracker for object tracking during prediction.
"""
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
return tracker
def tracking(bboxes, ffeats):
tracker_yaml = r"./trackers/cfg/botsort.yaml"
tracker = init_tracker(tracker_yaml)
TrackBoxes = np.empty((0, 9), dtype = np.float32)
TracksDict = {}
'''========================== 执行跟踪处理 ============================='''
# dets 与 feats 应保持严格对应
for dets, feats in zip(bboxes, ffeats):
det_tracking = Boxes(dets).cpu().numpy()
tracks = tracker.update(det_tracking, features=feats)
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
这里frame_index 也可以用视频的 帧ID 代替, box_index 保持不变
'''
if len(tracks):
TrackBoxes = np.concatenate([TrackBoxes, tracks], axis=0)
FeatDict = {}
for track in tracks:
tid = int(track[8])
FeatDict.update({tid: feats[tid, :]})
frameID = tracks[0, 7]
# print(f"frameID: {int(frameID)}")
assert len(tracks) == len(FeatDict), f"Please check the func: tracker.update() at frameID({int(frameID)})"
TracksDict[f"frame_{int(frameID)}"] = {"feats":FeatDict}
return TrackBoxes, TracksDict
def read_imgs(imgspath, CamerType):
'''
inputs:
imgspath序列图像地址
CamerType相机类型0后摄1前摄
outputs
imgs图像序列
功能:
根据CamerType类型读取imgspath文件夹中的图像并根据帧索引进行排序。
do_tracking()中调用该函数实现1读取imgs并绘制各目标轨迹框2获取subimgs
'''
imgs, frmIDs = [], []
for filename in os.listdir(imgspath):
file, ext = os.path.splitext(filename)
flist = file.split('_')
if len(flist)==4 and ext in ImgFormat:
camID, frmID = flist[0], int(flist[-1])
if camID==CamerType:
img = cv2.imread(os.path.join(imgspath, filename))
imgs.append(img)
frmIDs.append(frmID)
if len(frmIDs):
indice = np.argsort(np.array(frmIDs))
imgs = [imgs[i] for i in indice]
return imgs
def do_tracking(fpath, savedir, event_name='images'):
'''
args:
fpath: 算法各模块输出的data文件地址匹配
savedir: 对 fpath 各模块输出的复现;
分析具体视频时,需指定 fpath 和 savedir
outputs:
img_tracking目标跟踪轨迹、本地轨迹分析算法的轨迹对比图
abimg现场轨迹分析算法、轨迹选择输出的对比图
'''
# fpath = r'D:\contrast\dataset\1_to_n\709\20240709-102758_6971558612189\1_track.data'
# savedir = r'D:\contrast\dataset\result\20240709-102843_6958770005357_6971558612189\error_6971558612189'
imgpath, dfname = os.path.split(fpath)
CamerType = dfname.split('_')[0]
'''1.1 构造 0/1_tracking_output.data 文件地址,读取文件数据'''
tracking_output_path = os.path.join(imgpath, CamerType + '_tracking_output.data')
basename = os.path.basename(imgpath)
if not os.path.isfile(fpath):
print(f"{basename}: Can't find {dfname} file!")
return None, None
if not os.path.isfile(tracking_output_path):
print(f"{basename}: Can't find {CamerType}_tracking_output.data file!")
return None, None
bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, tracking_feat_dict = extract_data(fpath)
tracking_output_boxes, _ = read_tracking_output(tracking_output_path)
'''1.2 利用本地跟踪算法生成各商品轨迹'''
# trackerboxes, tracker_feat_dict = tracking(bboxes, ffeats)
'''1.3 分别构造 2 个文件夹,(1) 存储画框后的图像; (2) 运动轨迹对应的 boxes子图'''
save_dir = os.path.join(savedir, event_name + '_images')
subimg_dir = os.path.join(savedir, event_name + '_subimgs')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if not os.path.exists(subimg_dir):
os.makedirs(subimg_dir)
'''2. 执行轨迹分析, 保存轨迹分析前后的对比图示'''
traj_graphic = event_name + '_' + CamerType
if CamerType == '1':
vts = doFrontTracks(trackerboxes, tracker_feat_dict)
vts.classify()
plt = plot_frameID_y2(vts)
# ftpath = os.path.join(savedir, f"{traj_graphic}_front_y2.png")
# plt.savefig(ftpath)
plt.close()
edgeline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
img_tracking = draw_all_trajectories(vts, edgeline, savedir, CamerType, draw5p=True)
elif CamerType == '0':
vts = doBackTracks(trackerboxes, tracker_feat_dict)
vts.classify()
edgeline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
img_tracking = draw_all_trajectories(vts, edgeline, savedir, CamerType, draw5p=True)
# imgpth = os.path.join(savedir, f"{traj_graphic}_.png")
# cv2.imwrite(str(imgpth), img)
else:
print("Please check data file!")
'''3 tracking() 算法输出后多轨迹选择问题分析'''
if CamerType == '1':
aline = cv2.imread("./shopcart/cart_tempt/board_ftmp_line.png")
elif CamerType == '0':
aline = cv2.imread("./shopcart/cart_tempt/edgeline.png")
else:
print("Please check data file!")
bline = aline.copy()
annotator = TrackAnnotator(aline, line_width=2)
for track in trackingboxes:
annotator.plotting_track(track)
aline = annotator.result()
annotator = TrackAnnotator(bline, line_width=2)
if not isinstance(tracking_output_boxes, list):
tracking_output_boxes = [tracking_output_boxes]
for track in tracking_output_boxes:
annotator.plotting_track(track)
bline = annotator.result()
abimg = np.concatenate((aline, bline), axis = 1)
abH, abW = abimg.shape[:2]
cv2.line(abimg, (int(abW/2), 0), (int(abW/2), abH), (128, 255, 128), 2)
# algpath = os.path.join(savedir, f"{traj_graphic}_alg.png")
# cv2.imwrite(str(algpath), abimg)
'''4. 画框后的图像和子图保存若imgs数与tracker中fid数不匹配只保存原图不保存子图'''
'''4.0 读取 fpath 中对应的图像 imgs '''
imgs = read_imgs(imgpath, CamerType)
'''4.1 imgs数 < trackerboxes 的 max(fid),返回原图'''
if len(imgs) < np.max(trackerboxes[:,7]):
for i in range(len(imgs)):
img_savepath = os.path.join(save_dir, CamerType + "_" + f"{i}.png")
cv2.imwrite(img_savepath, imgs[i])
print(f"{basename}: len(imgs) = {len(imgs)} < Tracker max(fid) = {int(np.max(trackerboxes[:,7]))}, 无法匹配画框")
return img_tracking, abimg
'''4.2 在 imgs 上画框并保存'''
imgs_dw = draw_tracking_boxes(imgs, trackerboxes)
for fid, img in imgs_dw:
img_savepath = os.path.join(save_dir, CamerType + "_fid_" + f"{int(fid)}.png")
cv2.imwrite(img_savepath, img)
'''4.3.2 保存轨迹选择对应的子图'''
# for track in tracking_output_boxes:
for track in vts.Residual:
for *xyxy, tid, conf, cls, fid, bid in track.boxes:
img = imgs[int(fid-1)]
x1, y1, x2, y2 = int(xyxy[0]/2), int(xyxy[1]/2), int(xyxy[2]/2), int(xyxy[3]/2)
subimg = img[y1:y2, x1:x2]
subimg_path = os.path.join(subimg_dir, f'{CamerType}_tid{int(tid)}_{int(fid)}_{int(bid)}.png' )
cv2.imwrite(subimg_path, subimg)
for track in tracking_output_boxes:
for *xyxy, tid, conf, cls, fid, bid in track:
img = imgs[int(fid-1)]
x1, y1, x2, y2 = int(xyxy[0]/2), int(xyxy[1]/2), int(xyxy[2]/2), int(xyxy[3]/2)
subimg = img[y1:y2, x1:x2]
subimg_path = os.path.join(subimg_dir, f'x_{CamerType}_tid{int(tid)}_{int(fid)}_{int(bid)}.png' )
cv2.imwrite(subimg_path, subimg)
return img_tracking, abimg
def tracking_simulate(eventpath, savepath):
'''args:
eventpath: 事件文件夹
savepath: 存储文件夹
遍历eventpath
'''
# =============================================================================
# '''1. 获取事件名'''
# event_names = os.path.basename(eventpath).strip().split('_')
# if len(event_names)==2 and len(event_names[1])>=8:
# enent_name = event_names[1]
# elif len(event_names)==2 and len(event_names[1])==0:
# enent_name = event_names[0]
# else:
# return
# =============================================================================
enent_name = os.path.basename(eventpath)
## only for simplify the filename
idx = enent_name.find('2024')
if idx>=0:
enent_name = enent_name[idx:(idx+15)]
'''2. 依次读取 0/1_track.data 中数据,进行仿真'''
illu_tracking, illu_select = [], []
for filename in os.listdir(eventpath):
# filename = '1_track.data'
if filename.find("track.data") < 0: continue
fpath = os.path.join(eventpath, filename)
if not os.path.isfile(fpath): continue
img_tracking, img_select = do_tracking(fpath, savepath, enent_name)
if img_select is not None:
illu_select.append(img_select)
if img_tracking is not None:
illu_tracking.append(img_tracking)
'''3. 共幅8图上下子图显示的是前后摄每一行4个子图分别为
(1) tracker输出原始轨迹; (2)本地tracking输出; (3)现场算法轨迹选择前轨迹; (4)现场算法轨迹选择后的轨迹
'''
if len(illu_select)==2:
Img_s = np.concatenate((illu_select[0], illu_select[1]), axis = 0)
H, W = Img_s.shape[:2]
cv2.line(Img_s, (0, int(H/2)), (int(W), int(H/2)), (128, 255, 128), 2)
elif len(illu_select)==1:
Img_s = illu_select[0]
else:
Img_s = None
if len(illu_tracking)==2:
Img_t = np.concatenate((illu_tracking[0], illu_tracking[1]), axis = 0)
H, W = Img_t.shape[:2]
cv2.line(Img_t, (0, int(H/2)), (int(W), int(H/2)), (128, 255, 128), 2)
elif len(illu_tracking)==1:
Img_t = illu_tracking[0]
else:
Img_t = None
'''3.1 保存输出轨迹图若tracking、select的shape相同则合并输出否则单独输出'''
imgpath_tracking = os.path.join(savepath, enent_name + '_tracking.png')
imgpath_select = os.path.join(savepath, enent_name + '_select.png')
imgpath_ts = os.path.join(savepath, enent_name + '_tracking_select.png')
if Img_t is not None and Img_s is not None and np.all(Img_s.shape==Img_t.shape):
Img_ts = np.concatenate((Img_t, Img_s), axis = 1)
H, W = Img_ts.shape[:2]
cv2.line(Img_ts, (int(W/2), 0), (int(W/2), int(H)), (0, 0, 255), 4)
cv2.imwrite(imgpath_ts, Img_ts)
else:
if Img_s: cv2.imwrite(imgpath_select, Img_s) # 不会执行到该处
if Img_t: cv2.imwrite(imgpath_tracking, Img_t) # 不会执行到该处
Img_ts = None
'''3.2 单独另存保存完好的 8 轨迹图'''
if Img_ts is not None:
basepath, _ = os.path.split(savepath)
trajpath = os.path.join(basepath, 'trajs')
if not os.path.exists(trajpath):
os.makedirs(trajpath)
traj_path = os.path.join(trajpath, enent_name+'.png')
cv2.imwrite(traj_path, Img_ts)
return Img_ts
# warnings.simplefilter("error", category=np.VisibleDeprecationWarning)
def main_loop():
del_barcode_file = r'\\192.168.1.28\share\测试_202406\0723\0723_3\deletedBarcode.txt'
basepath = r'\\192.168.1.28\share\测试_202406\0723\0723_3' # 测试数据文件夹地址
# del_barcode_file = r'\\192.168.1.28\share\测试_202406\1030\images\returnGoods.txt'
# basepath = r'\\192.168.1.28\share\测试_202406\1030\images' # 测试数据文件夹地址
'''获取性能测试数据相关路径'''
SavePath = r'D:\contrast\dataset\resultx' # 结果保存地址
saveimgs = True
if os.path.basename(del_barcode_file).find('deletedBarcode'):
relative_paths = get_contrast_paths(del_barcode_file, basepath, SavePath, saveimgs)
elif os.path.basename(del_barcode_file).find('returnGoods'):
blist = read_returnGoods_file(del_barcode_file)
errpairs, corrpairs, err_similarity, correct_similarity = one2n_return(blist)
relative_paths = []
for getoutevent, inputevent, errevent in errpairs:
relative_paths.append(os.path.join(basepath, getoutevent))
relative_paths.append(os.path.join(basepath, inputevent))
relative_paths.append(os.path.join(basepath, errevent))
# prefix = ["getout_", "input_", "error_"]
'''开始循环执行每次测试过任务'''
k = 0
for tuple_paths in relative_paths:
'''1. 生成存储结果图像的文件夹'''
namedirs = []
for data_path in tuple_paths:
base_name = os.path.basename(data_path).strip().split('_')
if len(base_name[-1]):
name = base_name[-1]
else:
name = base_name[0]
namedirs.append(name)
sdir = "_".join(namedirs)
savepath = os.path.join(SavePath, sdir)
# if os.path.exists(savepath):
# continue
if not os.path.exists(savepath):
os.makedirs(savepath)
'''2. 循环执行操作事件:取出、放入、错误匹配'''
for eventpath in tuple_paths:
try:
tracking_simulate(eventpath, savepath)
except Exception as e:
print(f'Error! {eventpath}, {e}')
# k +=1
# if k==1:
# break
def main():
'''
eventPaths: data文件地址该 data 文件包括 Pipeline 各模块输出
SavePath: 包含二级目录,一级目录为轨迹图像;二级目录为与data文件对应的序列图像存储地址。
'''
# eventPaths = r'\\192.168.1.28\share\测试_202406\0723\0723_3'
eventPaths = r'\\192.168.1.28\share\测试视频数据以及日志\各模块测试记录\展厅测试\1120_展厅模型v801测试\扫A放A'
savePath = r'D:\exhibition\result'
k=0
for pathname in os.listdir(eventPaths):
pathname = "20241121-144901-fdba61c6-aefa-4b50-876d-5e05998befdc_6920459905012_6920459905012"
eventpath = os.path.join(eventPaths, pathname)
savepath = os.path.join(savePath, pathname)
if not os.path.exists(savepath):
os.makedirs(savepath)
tracking_simulate(eventpath, savepath)
# try:
# tracking_simulate(eventpath, savepath)
# except Exception as e:
# print(f'Error! {eventpath}, {e}')
k += 1
if k==1:
break
if __name__ == "__main__":
# main_loop()
main()
# try:
# main_loop()
# except Exception as e:
# print(f'Error: {e}')

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Some files were not shown because too many files have changed in this diff Show More