Files
ieemoo-ai-detecttracking/bakeup/track_reid_20240515.py
2025-04-18 14:41:53 +08:00

630 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""
Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
$ python detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s_openvino_model # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
yolov5s_paddle_model # PaddlePaddle
"""
import argparse
import csv
import os
import platform
import sys
from pathlib import Path
import glob
import numpy as np
import pickle
import torch
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
from utils.torch_utils import select_device, smart_inference_mode
'''集成跟踪模块,输出跟踪结果文件 .npy'''
# from ultralytics.engine.results import Boxes # Results
# from ultralytics.utils import IterableSimpleNamespace, yaml_load
from tracking.utils.plotting import Annotator, colors
from tracking.utils import Boxes, IterableSimpleNamespace, yaml_load, boxes_add_fid
from tracking.trackers import BOTSORT, BYTETracker
from tracking.utils.showtrack import drawtracks
from hands.hand_inference import hand_pose
# =============================================================================
# from tracking.trackers.reid.reid_interface import ReIDInterface
# from tracking.trackers.reid.config import config as ReIDConfig
# ReIDEncoder = ReIDInterface(ReIDConfig)
# =============================================================================
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
def init_trackers(tracker_yaml = None, bs=1):
"""
Initialize trackers for object tracking during prediction.
"""
# tracker_yaml = r"./tracking/trackers/cfg/botsort.yaml"
TRACKER_MAP = {'bytetrack': BYTETracker, 'botsort': BOTSORT}
cfg = IterableSimpleNamespace(**yaml_load(tracker_yaml))
trackers = []
for _ in range(bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
trackers.append(tracker)
return trackers
@smart_inference_mode()
def run(
weights=ROOT / 'yolov5s.pt', # model path or triton URL
source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
tracker_yaml = "./tracking/trackers/cfg/botsort.yaml",
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_csv=False, # save results in CSV format
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidencesL
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
):
source = str(source)
# filename = os.path.split(source)[-1]
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
screenshot = source.lower().startswith('screen')
if is_url and is_file:
source = check_file(source) # download
save_dir = Path(project) / Path(source).stem
if save_dir.exists():
print(Path(source).stem)
# return
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
else:
save_dir.mkdir(parents=True, exist_ok=True)
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
seen, dt = 0, (Profile(), Profile(), Profile())
tracker = init_trackers(tracker_yaml, bs)[0]
handpose = hand_pose()
handlocals_dict = {}
boxes_and_imgs = []
track_boxes = np.empty((0, 9), dtype = np.float32)
det_boxes = np.empty((0, 9), dtype = np.float32)
features_dict = {}
for path, im, im0s, vid_cap, s in dataset:
if save_img and 'imgshow' not in locals().keys():
imgshow = im0s.copy()
## ============================= tracking 功能只处理视频writed by WQG
if dataset.mode == 'image':
continue
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Define the path for the CSV file
# csv_path = save_dir / 'predictions.csv'
# Create or append to the CSV file
# def write_to_csv(image_name, prediction, confidence):
# data = {'Image Name': image_name, 'Prediction': prediction, 'Confidence': confidence}
# with open(csv_path, mode='a', newline='') as f:
# writer = csv.DictWriter(f, fieldnames=data.keys())
# if not csv_path.is_file():
# writer.writeheader()
# writer.writerow(data)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
im0_ant = im0.copy()
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
s += '%gx%g ' % im.shape[2:] # print string
annotator = Annotator(im0_ant, line_width=line_thickness, example=str(names))
nd = len(det)
if nd:
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
det = det.cpu().numpy()
det = np.concatenate([det[:, :4], np.arange(nd).reshape(-1, 1), det[:, 4:]], axis=-1)
'''FeatFlag为相对于上一帧boxes当前boxes是否为静止的标志。'''
# def static_estimate(box1, box2, TH1=8, TH2=12):
# dij_abs = max(np.abs(box1 - box2))
# dij_euc = max([np.linalg.norm((box1[:2] - box2[:2])),
# np.linalg.norm((box1[2:4] - box2[2:4]))
# ])
# if dij_abs < TH1 and dij_euc < TH2:
# return True
# else:
# return False
# FeatFlag = [-1] * nd
# if len(boxes_and_imgs):
# detj = boxes_and_imgs[-1][0]
# frmj = boxes_and_imgs[-1][-1]
# for ii in range(nd):
# ## flag 中保存的是box索引
# condt1 = frame-frmj==1
# flag = [idx for jj, idx in enumerate(detj[:, 4]) if condt1 and static_estimate(det[ii, :4], detj[jj, :4])]
# if len(flag) == 1:
# FeatFlag[ii] = flag[0]
boxes_and_imgs.append((det, im0, frame))
## ================================================================ writed by WQG
'''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
这里frame_index 也可以用视频的 帧ID 代替, box_index 保持不变
'''
det_tracking = Boxes(det, im0.shape).cpu().numpy()
tracks = tracker.update(det_tracking, im0)
# detbox = [tlwh2tlbr(x._tlwh).tolist() + [x.track_id, x.score, x.cls, x.frame_id, x.idx]
# for x in tracker.tracked_stracks if x.is_activated]
if len(tracks):
'''
tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
0 1 2 3 4 5 6 7 8
'''
tracks[:, 7] = dataset.frame
'''================== 1. 提取手势位置 ==================='''
# idx_0 = tracks[:, 6].astype(np.int_) == 0
# hn = 0
# for j, index in enumerate(idx_0):
# if index:
# track = tracks[j, :]
# hand_local, imgshow = handpose.get_hand_local(track, im0)
# handlocals_dict.update({int(track[7]): {int(track[8]): hand_local}})
# # '''yoloV5和手势检测的召回率并不一直用hand_local代替tracks中手部的(x1, y1, x2, y2),会使得两种坐标方式混淆'''
# # if hand_local: tracks[j, :4] = hand_local
# hn += 1
# cv2.imwrite(f"D:\DeepLearning\yolov5\hands\images\{Path(source).stem}_{int(track[7])}_{hn}.png", imgshow)
'''================== 2. 存储轨迹信息 ==================='''
track_boxes = np.concatenate([track_boxes, tracks], axis=0)
# det_boxes = np.concatenate([det_boxes, detbox], axis=0)
'''================== 3. 存储轨迹 REID 特征 ============='''
def crop_img(track, image):
tlbr = track.tlwh_to_tlbr(track._tlwh).astype(np.int_)
H, W = image.shape[:2]
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
img = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
# cv2.imwrite(f"./runs/imgs/{int(track.idx)}.png", img)
return img
feat_dict_1 = {f'{int(x.idx)}_img': crop_img(x, im0) for x in tracker.tracked_stracks if x.is_activated}
feat_dict = {int(x.idx): x.curr_feat for x in tracker.tracked_stracks if x.is_activated}
feat_dict.update(feat_dict_1)
features_dict.update({int(dataset.frame): feat_dict})
# det_anno = tracks.copy()
# else:
# idmark = -1 * np.ones([det.shape[0], 1])
# det_anno = np.concatenate([det[:,:4], idmark, det[:, 4:]], axis=1)
for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
name = ('' if id==-1 else f'id:{int(id)} ') + names[int(cls)]
label = None if hide_labels else (name if hide_conf else f'{name} {conf:.2f}')
if id >=0 and cls==0:
color = colors(int(cls), True)
elif id >=0 and cls!=0:
color = colors(int(id), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
annotator.box_label(xyxy, label, color=color)
# Save results (image and video with tracking)
im0 = annotator.result()
save_path_img, ext = os.path.splitext(save_path)
if save_img:
if dataset.mode == 'image':
imgpath = save_path_img + f"_{dataset}.png"
else:
imgpath = save_path_img + f"_{dataset.frame}.png"
cv2.imwrite(Path(imgpath), im0)
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
## ======================================================================== written by WQG
## track_boxes: Array, [x1, y1, x2, y2, track_id, score, cls, frame_index, box_id]
'''上面保存了检测结果是视频和图像,以下还保存五种类型的数据'''
filename = os.path.split(save_path_img)[-1]
# file, ext = os.path.splitext(filename)
# =============================================================================
# fileElements = filename.split('_')
# if len(fileElements) == 6 and len(fileElements[3])==1:
# barcode = fileElements[1]
# camera = fileElements[3]
# elif len(fileElements) == 7 and len(fileElements[3])==1:
# barcode = fileElements[2]
# camera = fileElements[4]
# else:
# barcode = ''
# camera = ''
# =============================================================================
'''======================== 1. save in './run/detect/' ===================='''
if source.find("front") >= 0:
carttemp = cv2.imread("./tracking/shopcart/cart_tempt/board_ftmp_line.png")
else:
carttemp = cv2.imread("./tracking/shopcart/cart_tempt/edgeline.png")
imgshow = drawtracks(track_boxes, carttemp)
showpath_1 = save_path_img + "_show.png"
cv2.imwrite(Path(showpath_1), imgshow)
'''======================== 2. save boxes and raw images =================='''
# boxes_imgs_dir = Path('./tracking/data/boxes_imgs/')
# if not boxes_imgs_dir.exists():
# boxes_imgs_dir.mkdir(parents=True, exist_ok=True)
# boxes_imgs_path = boxes_imgs_dir.joinpath(f'{filename}.pkl')
# with open(boxes_imgs_path, 'wb') as file:
# pickle.dump(boxes_and_imgs, file)
'''======================== 3. save tracks data ==========================='''
tracks_dir = Path('./tracking/data/tracks/')
if not tracks_dir.exists():
tracks_dir.mkdir(parents=True, exist_ok=True)
tracks_path = tracks_dir.joinpath(filename + "_track.npy")
np.save(tracks_path, track_boxes)
detect_path = tracks_dir.joinpath(filename + "_detect.npy")
np.save(detect_path, det_boxes)
'''======================== 4. save reid features data ===================='''
feats_dir = Path('./tracking/data/trackfeats/')
if not feats_dir.exists():
feats_dir.mkdir(parents=True, exist_ok=True)
feats_path = feats_dir.joinpath(f'{filename}.pkl')
with open(feats_path, 'wb') as file:
pickle.dump(features_dict, file)
'''======================== 5. save hand_local data =================='''
# handlocal_dir = Path('./tracking/data/handlocal/')
# if not handlocal_dir.exists():
# handlocal_dir.mkdir(parents=True, exist_ok=True)
# handlocal_path = handlocal_dir.joinpath(f'{filename}.pkl')
# with open(handlocal_path, 'wb') as file:
# pickle.dump(handlocals_dict, file)
# Print results
t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
def parse_opt():
modelpath = ROOT / 'ckpts/best_yolov5m_250000.pt' # 'ckpts/best_15000_0908.pt', 'ckpts/yolov5s.pt', 'ckpts/best_20000_cls30.pt'
'''datapath为视频文件目录或视频文件'''
datapath = r"D:/datasets/ym/videos/标记视频/" # ROOT/'data/videos', ROOT/'data/images' images
# datapath = r"D:\datasets\ym\highvalue\videos"
# datapath = r"D:/dcheng/videos/"
# modelpath = ROOT / 'ckpts/yolov5s.pt'
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=modelpath, help='model path or triton URL') # 'yolov5s.pt', best_15000_0908.pt
parser.add_argument('--source', type=str, default=datapath, help='file/dir/URL/glob/screen/0(webcam)') # images, videos
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-csv', action='store_true', help='save results in CSV format')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main_loop_folders(opt):
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
# path1 = r"D:\datasets\ym\videos\标记视频"
path2 = r"D:\datasets\ym\永辉双摄视频\加购_前摄\videos_front"
# path3 = r"D:\datasets\ym\永辉双摄视频\加购_后摄\videos_back"
# path4 = r"D:\datasets\ym\永辉双摄视频\退购_前摄\videos_front"
# path5 = r"D:\datasets\ym\永辉双摄视频\退购_后摄\videos_back"
path6 = r"D:\datasets\ym\测试数据20240328\front"
path7 = r"D:\datasets\ym\测试数据20240328\back"
'''列表paths内的元素为视频文件夹该文件夹下元素为视频文件'''
paths = [path2, path7] # [path1, path2, path3, path4, path5]
optdict = vars(opt)
k1, k2 = 0, 0
for p in paths:
files = []
if os.path.isdir(p):
files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))
for file in files:
file = r"D:\datasets\ym\测试数据20240328\front\112954521-7dd5ddad-922a-427b-b59e-a593e95e6ff4_front.mp4"
optdict["source"] = file
run(**optdict)
k2 += 1
if k2 == 1:
break
elif os.path.isfile(p):
run(**optdict)
k1 += 1
if k1 == 1:
break
def find_files_in_nested_dirs(root_dir):
all_files = []
extensions = ['.mp4']
for dirpath, dirnames, filenames in os.walk(root_dir):
for filename in filenames:
file, ext = os.path.splitext(filename)
if ext in extensions:
all_files.append(os.path.join(dirpath, filename))
return all_files
print('=======')
def main(opt):
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
p = r"D:\datasets\ym\永辉测试数据_202404\20240402"
optdict = vars(opt)
files = []
k = 0
all_files = find_files_in_nested_dirs(p)
if os.path.isdir(p):
files.extend(sorted(glob.glob(os.path.join(p, '*.*'))))
for file in files:
optdict["source"] = file
run(**optdict)
k += 1
if k == 2:
break
elif os.path.isfile(p):
run(**vars(opt))
def main_loop(opt):
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
optdict = vars(opt)
# p = r"D:\datasets\ym\永辉测试数据_比对"
p = r"D:\datasets\ym\广告板遮挡测试\8"
# p = r"D:\datasets\ym\videos\标记视频"
# p = r"D:\datasets\ym\实验室测试"
k = 0
if os.path.isdir(p):
files = find_files_in_nested_dirs(p)
files = [r"D:\datasets\ym\videos\标记视频\test_20240402-173935_6920152400975_back_174037372.mp4",
r"D:\datasets\ym\videos\标记视频\test_20240402-173935_6920152400975_front_174037379.mp4"
]
files = [r"D:\datasets\ym\广告板遮挡测试\8\2500441577966_20240508-175946_front_addGood_70f75407b7ae_155_17788571404.mp4"]
for file in files:
optdict["source"] = file
run(**optdict)
k += 1
if k == 2:
break
elif os.path.isfile(p):
optdict["source"] = p
run(**vars(opt))
if __name__ == '__main__':
opt = parse_opt()
# main_loop_folders(opt)
# main(opt)
main_loop(opt)