add yolo v10 and modify pipeline

This commit is contained in:
王庆刚
2025-03-28 13:19:54 +08:00
parent 183299c06b
commit 798c596acc
471 changed files with 19109 additions and 7342 deletions

Binary file not shown.

View File

@ -48,7 +48,7 @@ class FeatsInterface:
modpath = os.path.join(curpath, conf.test_model) modpath = os.path.join(curpath, conf.test_model)
self.model.load_state_dict(torch.load(modpath, map_location=conf.device)) self.model.load_state_dict(torch.load(modpath, map_location=conf.device))
self.model.eval() self.model.eval()
print('load model {} '.format(conf.testbackbone)) # print('load model {} '.format(conf.testbackbone))
def inference(self, images, detections=None): def inference(self, images, detections=None):
''' '''

31
execute_pipeline.py Normal file
View File

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 28 11:35:28 2025
@author: ym
"""
from pipeline_01 import execute_pipeline
execute_pipeline(evtdir = r"D:\datasets\ym\后台数据\unzip",
DataType = "raw", # raw, pkl
kk=1,
source_type = "video", # video, image,
save_path = r"D:\work\result_pipeline_V5",
yolo_ver = "V5", # V10, V5
weight_yolo_v5 = r'./ckpts/best_cls10_0906.pt' ,
weight_yolo_v10 = r'./ckpts/best_v10s_width0375_1205.pt',
saveimages = False
)
execute_pipeline(evtdir = r"D:\datasets\ym\后台数据\unzip",
DataType = "raw", # raw, pkl
kk=1,
source_type = "video", # video, image,
save_path = r"D:\work\result_pipeline_V10",
yolo_ver = "V10", # V10, V5
weight_yolo_v5 = r'./ckpts/best_cls10_0906.pt' ,
weight_yolo_v10 = r'./ckpts/best_v10s_width0375_1205.pt',
saveimages = False
)

127
imgs_to_video.py Normal file
View File

@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 19:15:05 2024
@author: ym
"""
import cv2
import os
import glob
IMG_FORMATS = "bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm" # include image suffixes
VID_FORMATS = "asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv" # include video suffixes
def for_test():
save_path = video_path + img_path
fps, w, h = 10, 1024, 1280
cap = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
pathx = path + img_path
imgfiles = [f for f in os.listdir(pathx) if not f.find("_cut") != -1]
imgfiles.sort(key = lambda x: int(x[:-5]))
imgpaths = []
for imgfile in imgfiles:
imgpaths.append(os.path.join(pathx, imgfile))
center = (1280/2, 1024/2)
rotate_matrix = cv2.getRotationMatrix2D(center=center, angle=-90, scale=1)
k = 0
for ipath in imgpaths:
img = cv2.imread(ipath)
rotated_image = cv2.warpAffine(src=img, M=rotate_matrix, dsize=(w, h))
cap.write(rotated_image)
print("Have imgs")
def test_1():
# name = os.path.split(img_path)[-1]
# save_path = video_path + name + '.mp4'
save_path = video_path + img_path
fps, w, h = 10, 1024, 1280
cap = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
pathx = path + img_path
imgfiles = [f for f in os.listdir(pathx) if not f.find("_cut") != -1]
imgfiles.sort(key = lambda x: int(x[:-5]))
imgpaths = []
for imgfile in imgfiles:
imgpaths.append(os.path.join(pathx, imgfile))
# ipaths = [os.path.join(pathx, f) for f in os.listdir(pathx) if not f.find("_cut") != -1]
# ipaths = []
# for f in os.listdir(pathx):
# if not f.find('_cut'):
# ipaths.append(os.path.join(pathx, f))
# ipaths.sort(key = lambda x: int(x.split('_')[-2]))
k = 0
for ipath in imgpaths:
img = cv2.imread(ipath)
cap.write(img)
k += 1
cap.release()
print(img_path + f" have imgs: {k}")
def img2video(imgpath):
if not os.path.isdir(imgpath):
return
files = []
files.extend(sorted(glob.glob(os.path.join(imgpath, "*.*"))))
images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
h, w = cv2.imread(images[0]).shape[:2]
fps = 25
vidpath = imgpath + '.mp4'
cap = cv2.VideoWriter(vidpath, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
for p in images:
img = cv2.imread(p)
cap.write(img)
cap.release()
def main():
imgpath = r"D:\work\result\202503251112_v10s_result"
img2video(imgpath)
if __name__ == "__main__":
main()

View File

@ -76,7 +76,11 @@ def attempt_load(weights, device=None, inplace=True, fuse=True):
model = Ensemble() model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]: for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location=device, weights_only=False) # load if torch.__version__ >= '2.6':
ckpt = torch.load(attempt_download(w), map_location=device, weights_only=False) # load
else:
ckpt = torch.load(attempt_download(w), map_location=device)
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
# Model compatibility updates # Model compatibility updates

View File

@ -11,7 +11,7 @@ import pickle
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from scipy.spatial.distance import cdist from scipy.spatial.distance import cdist
from track_reid import yolo_resnet_tracker from track_reid import yolo_resnet_tracker, yolov10_resnet_tracker
from tracking.dotrack.dotracks_back import doBackTracks from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks from tracking.dotrack.dotracks_front import doFrontTracks
@ -65,22 +65,20 @@ def pipeline(
eventpath, eventpath,
savepath, savepath,
SourceType, SourceType,
weights weights,
YoloVersion="V5"
): ):
''' '''
eventpath: 单个事件的存储路径 eventpath: 单个事件的存储路径
''' '''
optdict = {}
optdict["weights"] = weights
if SourceType == "video": if SourceType == "video":
vpaths = get_video_pairs(eventpath) vpaths = get_video_pairs(eventpath)
elif SourceType == "image": elif SourceType == "image":
vpaths = get_image_pairs(eventpath) vpaths = get_image_pairs(eventpath)
optdict = {}
optdict["weights"] = weights
event_tracks = [] event_tracks = []
## 构造购物事件字典 ## 构造购物事件字典
@ -101,9 +99,9 @@ def pipeline(
savepath_spdict.mkdir(parents=True, exist_ok=True) savepath_spdict.mkdir(parents=True, exist_ok=True)
pf_path = Path(savepath_spdict) / Path(str(evtname)+".pickle") pf_path = Path(savepath_spdict) / Path(str(evtname)+".pickle")
if pf_path.exists(): # if pf_path.exists():
print(f"Pickle file have saved: {evtname}.pickle") # print(f"Pickle file have saved: {evtname}.pickle")
return # return
'''====================== 构造 ShoppingDict 模块 =======================''' '''====================== 构造 ShoppingDict 模块 ======================='''
ShoppingDict = {"eventPath": eventpath, ShoppingDict = {"eventPath": eventpath,
@ -160,12 +158,16 @@ def pipeline(
'''================= 3. Yolo + Resnet + Tracker =================''' '''================= 3. Yolo + Resnet + Tracker ================='''
optdict["source"] = vpath optdict["source"] = vpath
optdict["save_dir"] = savepath_pipeline_imgs optdict["save_dir"] = savepath_pipeline_imgs
optdict["is_save_img"] = False optdict["is_save_img"] = True
optdict["is_save_video"] = True optdict["is_save_video"] = True
yrtOut = yolo_resnet_tracker(**optdict) if YoloVersion == "V5":
yrtOut = yolo_resnet_tracker(**optdict)
elif YoloVersion == "V10":
yrtOut = yolov10_resnet_tracker(**optdict)
yrtOut_save = [] yrtOut_save = []
for frdict in yrtOut: for frdict in yrtOut:
fr_dict = {} fr_dict = {}
@ -285,21 +287,32 @@ def pipeline(
trajpath = os.path.join(savepath_pipeline, "trajectory.png") trajpath = os.path.join(savepath_pipeline, "trajectory.png")
cv2.imwrite(trajpath, img_cat) cv2.imwrite(trajpath, img_cat)
def main(): def execute_pipeline(evtdir = r"D:\datasets\ym\后台数据\unzip",
source_type = "video", # video, image,
save_path = r"D:\work\result_pipeline",
yolo_ver = "V10", # V10, V5
weight_yolo_v5 = r'./ckpts/best_cls10_0906.pt' ,
weight_yolo_v10 = r'./ckpts/best_v10s_width0375_1205.pt',
k=0
):
'''
运行函数 pipeline(),遍历事件文件夹,每个文件夹是一个事件
''' '''
函数pipeline(),遍历事件文件夹,选择类型 image 或 video,
'''
parmDict = {} parmDict = {}
evtdir = r"../dataset/backend_20250310" parmDict["SourceType"] = source_type
parmDict["SourceType"] = "video" # video, image parmDict["savepath"] = save_path
parmDict["savepath"] = r"../dataset/run" parmDict["YoloVersion"] = yolo_ver
parmDict["weights"] = r'./ckpts/best_cls10_0906.pt' if parmDict["YoloVersion"] == "V5":
parmDict["weights"] = weight_yolo_v5
elif parmDict["YoloVersion"] == "V10":
parmDict["weights"] = weight_yolo_v10
evtdir = Path(evtdir) evtdir = Path(evtdir)
k, errEvents = 0, [] errEvents = []
for item in evtdir.iterdir(): for item in evtdir.iterdir():
if item.is_dir(): if item.is_dir():
# item = evtdir/Path("20250303-103058-074_6914973604223_6914973604223") item = evtdir/Path("20250310-175352-741")
parmDict["eventpath"] = item parmDict["eventpath"] = item
pipeline(**parmDict) pipeline(**parmDict)
# try: # try:
@ -307,19 +320,21 @@ def main():
# except Exception as e: # except Exception as e:
# errEvents.append(str(item)) # errEvents.append(str(item))
k+=1 k+=1
if k==5: if k==1:
break break
errfile = os.path.join(parmDict["savepath"], f'error_events.txt') errfile = os.path.join(parmDict["savepath"], 'error_events.txt')
with open(errfile, 'w', encoding='utf-8') as f: with open(errfile, 'w', encoding='utf-8') as f:
for line in errEvents: for line in errEvents:
f.write(line + '\n') f.write(line + '\n')
if __name__ == "__main__": if __name__ == "__main__":
main() execute_pipeline()
# spath_v10 = r"D:\work\result_pipeline_v10"
# spath_v5 = r"D:\work\result_pipeline_v5"
# execute_pipeline(save_path=spath_v10, yolo_ver="V10")
# execute_pipeline(save_path=spath_v5, yolo_ver="V5")

395
pipeline_01.py Normal file
View File

@ -0,0 +1,395 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 29 08:59:21 2024
@author: ym
"""
import os
# import sys
import cv2
import pickle
import numpy as np
from pathlib import Path
from scipy.spatial.distance import cdist
from track_reid import yolo_resnet_tracker, yolov10_resnet_tracker
from tracking.dotrack.dotracks_back import doBackTracks
from tracking.dotrack.dotracks_front import doFrontTracks
from tracking.utils.drawtracks import plot_frameID_y2, draw_all_trajectories
from utils.getsource import get_image_pairs, get_video_pairs
from tracking.utils.read_data import read_similar
class CameraEvent_:
def __init__(self):
self.cameraType = '', # "front", "back"
self.videoPath = '',
self.imagePaths = [],
self.yoloResnetTracker =[],
self.tracking = None,
class ShoppingEvent_:
def __init__(self):
self.eventPath = ''
self.eventName = ''
self.barcode = ''
self.eventType = '', # "input", "output", "other"
self.frontCamera = None
self.backCamera = None
self.one2n = []
def save_subimgs(imgdict, boxes, spath, ctype, featdict = None):
'''
当前 box 特征和该轨迹前一个 box 特征的相似度,可用于和跟踪序列中的相似度进行比较
'''
boxes = boxes[np.argsort(boxes[:, 7])]
for i in range(len(boxes)):
simi = None
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
if i>0:
_, fid0, bid0 = int(boxes[i-1, 4]), int(boxes[i-1, 7]), int(boxes[i-1, 8])
if f"{fid0}_{bid0}" in featdict.keys() and f"{fid}_{bid}" in featdict.keys():
feat0 = featdict[f"{fid0}_{bid0}"]
feat1 = featdict[f"{fid}_{bid}"]
simi = 1 - np.maximum(0.0, cdist(feat0[None, :], feat1[None, :], "cosine"))[0][0]
img = imgdict[f"{fid}_{bid}"]
imgpath = spath / f"{ctype}_tid{tid}-{fid}-{bid}.png"
if simi is not None:
imgpath = spath / f"{ctype}_tid{tid}-{fid}-{bid}_sim{simi:.2f}.png"
cv2.imwrite(imgpath, img)
def save_subimgs_1(imgdict, boxes, spath, ctype, simidict = None):
'''
当前 box 特征和该轨迹 smooth_feat 特征的相似度, yolo_resnet_tracker 函数中,
采用该方式记录特征相似度
'''
for i in range(len(boxes)):
tid, fid, bid = int(boxes[i, 4]), int(boxes[i, 7]), int(boxes[i, 8])
key = f"{fid}_{bid}"
img = imgdict[key]
imgpath = spath / f"{ctype}_tid{tid}-{fid}-{bid}.png"
if simidict is not None and key in simidict.keys():
imgpath = spath / f"{ctype}_tid{tid}-{fid}-{bid}_sim{simidict[key]:.2f}.png"
cv2.imwrite(imgpath, img)
def show_result(event_tracks, yrtDict, savepath_pipe):
'''保存 Tracking 输出的运动轨迹子图,并记录相似度'''
savepath_pipe_subimgs = savepath_pipe / Path("subimgs")
if not savepath_pipe_subimgs.exists():
savepath_pipe_subimgs.mkdir(parents=True, exist_ok=True)
for CamerType, vts in event_tracks:
if len(vts.tracks)==0: continue
if CamerType == 'front':
# yolos = ShoppingDict["frontCamera"]["yoloResnetTracker"]
yolos = yrtDict["frontyrt"]
ctype = 1
if CamerType == 'back':
# yolos = ShoppingDict["backCamera"]["yoloResnetTracker"]
yolos = yrtDict["backyrt"]
ctype = 0
imgdict, featdict, simidict = {}, {}, {}
for y in yolos:
imgdict.update(y["imgs"])
featdict.update(y["feats"])
simidict.update(y["featsimi"])
for track in vts.Residual:
if isinstance(track, np.ndarray):
save_subimgs(imgdict, track, savepath_pipe_subimgs, ctype, featdict)
else:
save_subimgs(imgdict, track.slt_boxes, savepath_pipe_subimgs, ctype, featdict)
'''(3) 轨迹显示与保存'''
illus = [None, None]
for CamerType, vts in event_tracks:
if len(vts.tracks)==0: continue
if CamerType == 'front':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/board_ftmp_line.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipe, CamerType, draw5p=True)
illus[0] = img_tracking
plt = plot_frameID_y2(vts)
plt.savefig(os.path.join(savepath_pipe, "front_y2.png"))
if CamerType == 'back':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/edgeline.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipe, CamerType, draw5p=True)
illus[1] = img_tracking
illus = [im for im in illus if im is not None]
if len(illus):
img_cat = np.concatenate(illus, axis = 1)
if len(illus)==2:
H, W = img_cat.shape[:2]
cv2.line(img_cat, (int(W/2), 0), (int(W/2), int(H)), (128, 128, 255), 3)
trajpath = os.path.join(savepath_pipe, "trajectory.png")
cv2.imwrite(trajpath, img_cat)
def pipeline(eventpath,
SourceType,
weights,
DataType = "raw", #raw, pkl: images or videos, pkl, pickle file
YoloVersion="V5",
savepath = None,
saveimages = True
):
## 构造购物事件字典
evtname = Path(eventpath).stem
barcode = evtname.split('_')[-1] if len(evtname.split('_'))>=2 \
and len(evtname.split('_')[-1])>=8 \
and evtname.split('_')[-1].isdigit() else ''
'''事件结果存储文件夹: savepath_pipe, savepath_pkl'''
if not savepath:
savepath = Path(__file__).resolve().parents[0] / "events_result"
savepath_pipe = Path(savepath) / Path("yolos_tracking") / evtname
savepath_pkl = Path(savepath) / "shopping_pkl"
if not savepath_pkl.exists():
savepath_pkl.mkdir(parents=True, exist_ok=True)
pklpath = Path(savepath_pkl) / Path(str(evtname)+".pickle")
yrtDict = {}
yrt_out = []
if DataType == "raw":
### 不重复执行已经过yolo-resnet-tracker
# if pklpath.exists():
# print(f"Pickle file have saved: {evtname}.pickle")
# return
if SourceType == "video":
vpaths = get_video_pairs(eventpath)
elif SourceType == "image":
vpaths = get_image_pairs(eventpath)
for vpath in vpaths:
'''================= 2. 事件结果存储文件夹 ================='''
if isinstance(vpath, list):
savepath_pipe_imgs = savepath_pipe / Path("images")
else:
savepath_pipe_imgs = savepath_pipe / Path(str(Path(vpath).stem))
if not savepath_pipe_imgs.exists():
savepath_pipe_imgs.mkdir(parents=True, exist_ok=True)
optdict = {}
optdict["weights"] = weights
optdict["source"] = vpath
optdict["save_dir"] = savepath_pipe_imgs
optdict["is_save_img"] = saveimages
optdict["is_save_video"] = True
if YoloVersion == "V5":
yrtOut = yolo_resnet_tracker(**optdict)
elif YoloVersion == "V10":
yrtOut = yolov10_resnet_tracker(**optdict)
yrt_out.append((vpath, yrtOut))
elif DataType == "pkl":
pass
else:
return
'''====================== 构造 ShoppingDict 模块 ======================='''
ShoppingDict = {"eventPath": eventpath,
"eventName": evtname,
"barcode": barcode,
"eventType": '', # "input", "output", "other"
"frontCamera": {},
"backCamera": {},
"one2n": [] #
}
procpath = Path(eventpath).joinpath('process.data')
if procpath.is_file():
SimiDict = read_similar(procpath)
ShoppingDict["one2n"] = SimiDict['one2n']
event_tracks = []
for vpath, yrtOut in yrt_out:
'''================= 1. 构造相机事件字典 ================='''
CameraEvent = {"cameraType": '', # "front", "back"
"videoPath": '',
"imagePaths": [],
"yoloResnetTracker": [],
"tracking": [],
}
if isinstance(vpath, list):
CameraEvent["imagePaths"] = vpath
bname = os.path.basename(vpath[0])
if not isinstance(vpath, list):
CameraEvent["videoPath"] = vpath
bname = os.path.basename(vpath).split('.')[0]
if bname.split('_')[0] == "0" or bname.find('back')>=0:
CameraEvent["cameraType"] = "back"
if bname.split('_')[0] == "1" or bname.find('front')>=0:
CameraEvent["cameraType"] = "front"
'''2种保存方式: (1) save images, (2) no save images'''
### (1) save images
yrtOut_save = []
for frdict in yrtOut:
fr_dict = {}
for k, v in frdict.items():
if k != "imgs":
fr_dict[k]=v
yrtOut_save.append(fr_dict)
CameraEvent["yoloResnetTracker"] = yrtOut_save
### (2) no save images
# CameraEvent["yoloResnetTracker"] = yrtOut
'''================= 4. tracking ================='''
'''(1) 生成用于 tracking 模块的 boxes、feats'''
bboxes = np.empty((0, 6), dtype=np.float64)
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackefeats = {}
for frameDict in yrtOut:
tboxes = frameDict["tboxes"]
ffeats = frameDict["feats"]
boxes = frameDict["bboxes"]
bboxes = np.concatenate((bboxes, np.array(boxes)), axis=0)
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)), axis=0)
for i in range(len(tboxes)):
fid, bid = int(tboxes[i, 7]), int(tboxes[i, 8])
trackefeats.update({f"{fid}_{bid}": ffeats[f"{fid}_{bid}"]})
'''(2) tracking, 后摄'''
if CameraEvent["cameraType"] == "back":
vts = doBackTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("back", vts))
CameraEvent["tracking"] = vts
ShoppingDict["backCamera"] = CameraEvent
yrtDict["backyrt"] = yrtOut
'''(2) tracking, 前摄'''
if CameraEvent["cameraType"] == "front":
vts = doFrontTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("front", vts))
CameraEvent["tracking"] = vts
ShoppingDict["frontCamera"] = CameraEvent
yrtDict["frontyrt"] = yrtOut
'''========================== 保存模块 ================================='''
# 保存 ShoppingDict
with open(str(pklpath), 'wb') as f:
pickle.dump(ShoppingDict, f)
# 绘制并保存轨迹图
show_result(event_tracks, yrtDict, savepath_pipe)
def execute_pipeline(evtdir = r"D:\datasets\ym\后台数据\unzip",
DataType = "raw", # raw, pkl
save_path = r"D:\work\result_pipeline",
kk=1,
source_type = "video", # video, image,
yolo_ver = "V10", # V10, V5
weight_yolo_v5 = r'./ckpts/best_cls10_0906.pt' ,
weight_yolo_v10 = r'./ckpts/best_v10s_width0375_1205.pt',
saveimages = True
):
'''
运行函数 pipeline(),遍历事件文件夹,每个文件夹是一个事件
'''
parmDict = {}
parmDict["DataType"] = DataType
parmDict["savepath"] = save_path
parmDict["SourceType"] = source_type
parmDict["YoloVersion"] = yolo_ver
if parmDict["YoloVersion"] == "V5":
parmDict["weights"] = weight_yolo_v5
elif parmDict["YoloVersion"] == "V10":
parmDict["weights"] = weight_yolo_v10
parmDict["saveimages"] = saveimages
evtdir = Path(evtdir)
errEvents = []
k = 0
for item in evtdir.iterdir():
if item.is_dir():
item = evtdir/Path("20250310-175352-741")
parmDict["eventpath"] = item
pipeline(**parmDict)
# try:
# pipeline(**parmDict)
# except Exception as e:
# errEvents.append(str(item))
k+=1
if kk is not None and k==kk:
break
errfile = os.path.join(parmDict["savepath"], 'error_events.txt')
with open(errfile, 'w', encoding='utf-8') as f:
for line in errEvents:
f.write(line + '\n')
if __name__ == "__main__":
execute_pipeline()
# spath_v10 = r"D:\work\result_pipeline_v10"
# spath_v5 = r"D:\work\result_pipeline_v5"
# execute_pipeline(save_path=spath_v10, yolo_ver="V10")
# execute_pipeline(save_path=spath_v5, yolo_ver="V5")

View File

@ -64,7 +64,10 @@ from hands.hand_inference import hand_pose
from contrast.feat_extract.config import config as conf from contrast.feat_extract.config import config as conf
from contrast.feat_extract.inference import FeatsInterface from contrast.feat_extract.inference import FeatsInterface
from ultralytics import YOLOv10
ReIDEncoder = FeatsInterface(conf) ReIDEncoder = FeatsInterface(conf)
print(f'load model {conf.testbackbone} in {Path(__file__).stem}')
IMG_FORMATS = '.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', '.webp', '.pfm' # include image suffixes IMG_FORMATS = '.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', '.webp', '.pfm' # include image suffixes
VID_FORMATS = '.asf', '.avi', '.gif', '.m4v', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.ts', '.wmv' # include video suffixes VID_FORMATS = '.asf', '.avi', '.gif', '.m4v', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.ts', '.wmv' # include video suffixes
@ -131,12 +134,158 @@ def init_trackers(tracker_yaml = None, bs=1):
trackers = [] trackers = []
for _ in range(bs): for _ in range(bs):
tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30) tracker = TRACKER_MAP[cfg.tracker_type](args=cfg, frame_rate=30)
if cfg.with_reid:
tracker.encoder = ReIDEncoder
trackers.append(tracker) trackers.append(tracker)
return trackers return trackers
'''=============== used in pipeline.py for Yolov10 =================='''
def yolov10_resnet_tracker(
weights = ROOT / 'ckpts/best_v10s_width0375_1205.pt', # model path or triton URL
source = '', # file/dir/URL/glob/screen/0(webcam)
save_dir = '',
is_save_img = True,
is_save_video = True,
tracker_yaml = "./tracking/trackers/cfg/botsort.yaml",
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
):
## load a custom model
model = YOLOv10(weights)
'''=============== used in pipeline.py ==================''' custom = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"}
kwargs = {"save": True, "imgsz": 640, "conf": 0.1}
args = {**model.overrides, **custom, **kwargs}
predictor = model.task_map[model.task]["predictor"](overrides=args, _callbacks=model.callbacks)
vid_path, vid_writer = None, None
tracker = init_trackers(tracker_yaml)[0]
yoloResnetTracker = []
for i, result in enumerate(predictor.stream_inference(source)):
datamode = predictor.dataset.mode
det = result.boxes.data.cpu().numpy()
im0 = result.orig_img
names = result.names
path = result.path
im_array = result.plot()
## to do tracker.update()
det_tracking = Boxes(det, im0.shape)
tracks, outfeats = tracker.update(det_tracking, im0)
if datamode == "video":
frameId = predictor.dataset.frame
elif datamode == "image":
frameId = predictor.dataset.count
annotator = Annotator(im0.copy(), line_width=line_thickness, example=str(names))
simdict, simdict1 = {}, {}
for fid, bid, mfeat, cfeat, features in outfeats:
if mfeat is not None and cfeat is not None:
simi = 1 - np.maximum(0.0, cdist(mfeat[None, :], cfeat[None, :], "cosine"))[0][0]
simdict.update({f"{int(frameId)}_{int(bid)}":simi})
if cfeat is not None and len(features)>=2:
mfeat = features[-2]
simi = 1 - np.maximum(0.0, cdist(mfeat[None, :], cfeat[None, :], "cosine"))[0][0]
simdict1.update({f"{int(frameId)}_{int(bid)}":simi})
if len(tracks) > 0:
tracks[:, 7] = frameId
# trackerBoxes = np.concatenate([trackerBoxes, tracks], axis=0)
'''================== 1. 存储 dets/subimgs/features Dict ============='''
imgs, features = ReIDEncoder.inference(im0, tracks)
imgdict, featdict = {}, {}
for ii, bid in enumerate(tracks[:, 8]):
featdict.update({f"{int(frameId)}_{int(bid)}": features[ii, :]}) # [f"feat_{int(bid)}"] = features[i, :]
imgdict.update({f"{int(frameId)}_{int(bid)}": imgs[ii]})
frameDict = {"path": path,
"fid": int(frameId),
"bboxes": det,
"tboxes": tracks,
"imgs": imgdict,
"feats": featdict,
"featsimi": simdict, # 当前 box 特征和该轨迹 smooth_feat 特征的相似度
"featsimi1": simdict1 # 当前 box 特征和该轨迹前一个 box 特征的相似度
}
yoloResnetTracker.append(frameDict)
# imgs, features = inference_image(im0, tracks)
# TrackerFeats = np.concatenate([TrackerFeats, features], axis=0)
'''================== 2. 提取手势位置 ==================='''
for *xyxy, id, conf, cls, fid, bid in reversed(tracks):
name = ('' if id==-1 else f'id:{int(id)} ') + names[int(cls)]
if f"{int(frameId)}_{int(bid)}" in simdict.keys():
sim = simdict[f"{int(frameId)}_{int(bid)}"]
label = f"{name} {sim:.2f}"
else:
label = None if hide_labels else name
# label = None if hide_labels else (name if hide_conf else f'{name} {conf:.1f}')
if id >=0 and cls==0:
color = colors(int(cls), True)
elif id >=0 and cls!=0:
color = colors(int(id), True)
else:
color = colors(19, True) # 19为调色板的最后一个元素
annotator.box_label(xyxy, label, color=color)
'''====== Save results (image and video) ======'''
# save_path = str(save_dir / Path(path).name) # 带有后缀名
im0 = annotator.result()
if is_save_img:
save_path_img = str(save_dir / Path(path).stem)
if datamode == 'image':
imgpath = save_path_img + ".png"
if datamode == 'video' :
imgpath = save_path_img + f"_{frameId}.png"
cv2.imwrite(Path(imgpath), im0)
# if dataset.mode == 'video' and is_save_video:
if is_save_video:
if datamode == 'video':
video_path = str(save_dir / Path(path).stem) + '.mp4' # 带有后缀名
else:
videoname = str(Path(path).stem).split('_')[0] + '.mp4'
video_path = str(save_dir / videoname)
if vid_path != video_path: # new video
vid_path = video_path
vid_cap = predictor.dataset.cap
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 25, im0.shape[1], im0.shape[0]
## for image rotating in dataloader.LoadImages.__next__()
w, h = im0.shape[1], im0.shape[0]
video_path = str(Path(video_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
vid_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
return yoloResnetTracker
'''=============== used in pipeline.py for Yolov5 =================='''
@smart_inference_mode() @smart_inference_mode()
def yolo_resnet_tracker( def yolo_resnet_tracker(
weights=ROOT / 'yolov5s.pt', # model path or triton URL weights=ROOT / 'yolov5s.pt', # model path or triton URL
@ -660,8 +809,6 @@ def run(
def parse_opt(): def parse_opt():
modelpath = ROOT / 'ckpts/best_cls10_0906.pt' # 'ckpts/best_15000_0908.pt', 'ckpts/yolov5s.pt', 'ckpts/best_20000_cls30.pt, best_yolov5m_250000' modelpath = ROOT / 'ckpts/best_cls10_0906.pt' # 'ckpts/best_15000_0908.pt', 'ckpts/yolov5s.pt', 'ckpts/best_20000_cls30.pt, best_yolov5m_250000'
'''datapath为视频文件目录或视频文件''' '''datapath为视频文件目录或视频文件'''
datapath = r"D:/datasets/ym/videos/标记视频/" # ROOT/'data/videos', ROOT/'data/images' images datapath = r"D:/datasets/ym/videos/标记视频/" # ROOT/'data/videos', ROOT/'data/images' images
# datapath = r"D:\datasets\ym\highvalue\videos" # datapath = r"D:\datasets\ym\highvalue\videos"
@ -714,7 +861,7 @@ def find_video_imgs(root_dir):
def main(): def main_v5():
''' '''
run(): 单张图像或单个视频文件的推理,不支持图像序列, run(): 单张图像或单个视频文件的推理,不支持图像序列,
''' '''
@ -733,10 +880,10 @@ def main():
# p = r"D:\exhibition\images\153112511_0_seek_105.mp4" # p = r"D:\exhibition\images\153112511_0_seek_105.mp4"
# p = r"D:\exhibition\images\image" # p = r"D:\exhibition\images\image"
p = r"D:\全实时\202502\tracker\1_1740891284792.mp4" p = r"D:\datasets\ym\后台数据\unzip\20250310-175352-741"
optdict["project"] = r"D:\全实时\202502\tracker" optdict["project"] = r"D:\work\result"
# optdict["project"] = r"D:\exhibition\result" optdict["weights"] = ROOT / 'ckpts/best_cls10_0906.pt'
if os.path.isdir(p): if os.path.isdir(p):
files = find_video_imgs(p) files = find_video_imgs(p)
k = 0 k = 0
@ -745,17 +892,39 @@ def main():
run(**optdict) run(**optdict)
k += 1 k += 1
if k == 1: if k == 2:
break break
elif os.path.isfile(p): elif os.path.isfile(p):
optdict["source"] = p optdict["source"] = p
run(**optdict) run(**optdict)
def main_v10():
datapath = r'D:\datasets\ym\后台数据\unzip\20250310-175352-741\0.mp4'
savepath = r'D:\work\result'
savepath = savepath / Path(str(Path(datapath).stem))
if not savepath.exists():
savepath.mkdir(parents=True, exist_ok=True)
weightpath = ROOT / 'ckpts/best_v10s_width0375_1205.pt'
optdict = {}
optdict["weights"] = weightpath
optdict["source"] = datapath
optdict["save_dir"] = savepath
optdict["is_save_img"] = True
optdict["is_save_video"] = True
yrtOut = yolov10_resnet_tracker(**optdict)
if __name__ == '__main__': if __name__ == '__main__':
main() # main_v5()
main_v10()

View File

@ -116,11 +116,13 @@ class BOTSORT(BYTETracker):
self.proximity_thresh = args.proximity_thresh self.proximity_thresh = args.proximity_thresh
self.appearance_thresh = args.appearance_thresh self.appearance_thresh = args.appearance_thresh
if args.with_reid: # if args.with_reid:
# Haven't supported BoT-SORT(reid) yet # # Haven't supported BoT-SORT(reid) yet
# self.encoder = ReIDInterface(config) # # self.encoder = ReIDInterface(config)
self.encoder = FeatsInterface(conf) # self.encoder = FeatsInterface(conf)
# print('load model {} in BOTSORT'.format(conf.testbackbone))
# self.gmc = GMC(method=args.gmc_method) # commented by WQG # self.gmc = GMC(method=args.gmc_method) # commented by WQG

View File

@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 27 16:09:07 2025
@author: ym
"""
import os
import sys
import cv2
import pickle
import numpy as np
from pathlib import Path
from scipy.spatial.distance import cdist
from .dotrack.dotracks_back import doBackTracks
from .dotrack.dotracks_front import doFrontTracks
from .utils.drawtracks import plot_frameID_y2, draw_all_trajectories
from .utils.read_data import read_similar
class CameraEvent_:
def __init__(self):
self.cameraType = '', # "front", "back"
self.videoPath = '',
self.imagePaths = [],
self.yoloResnetTracker =[],
self.tracking = None,
class ShoppingEvent_:
def __init__(self):
self.eventPath = ''
self.eventName = ''
self.barcode = ''
self.eventType = '', # "input", "output", "other"
self.frontCamera = None
self.backCamera = None
self.one2n = []
def main():
'''
将一个对象读取,修改其中一个属性
'''
evt_pkfile = 'path.pickle'
with open(evt_pkfile, 'rb') as f:
ShoppingDict = pickle.load(f)
savepath = ""
back_camera = ShoppingDict["backCamera"]["cameraType"]
back_yrt = ShoppingDict["backCamera"]["yoloResnetTracker"]
front_camera = ShoppingDict["frontCamera"]["cameraType"]
front_yrt = ShoppingDict["frontCamera"]["yoloResnetTracker"]
yrts = [(back_camera, back_yrt), (front_camera, front_yrt)]
shopping_event = ShoppingEvent_()
shopping_event.eventPath = ShoppingDict["eventPath"]
shopping_event.eventName = ShoppingDict["eventName"]
shopping_event.barcode = ShoppingDict["barcode"]
yrtDict = {}
event_tracks = []
for camera_type, yrtOut in yrts:
'''
inputs:
yrtOut
camera_type
outputs:
CameraEvent
'''
camera_event = CameraEvent_()
'''================= 4. tracking ================='''
'''(1) 生成用于 tracking 模块的 boxes、feats'''
bboxes = np.empty((0, 6), dtype=np.float64)
trackerboxes = np.empty((0, 9), dtype=np.float64)
trackefeats = {}
for frameDict in yrtOut:
tboxes = frameDict["tboxes"]
ffeats = frameDict["feats"]
boxes = frameDict["bboxes"]
bboxes = np.concatenate((bboxes, np.array(boxes)), axis=0)
trackerboxes = np.concatenate((trackerboxes, np.array(tboxes)), axis=0)
for i in range(len(tboxes)):
fid, bid = int(tboxes[i, 7]), int(tboxes[i, 8])
trackefeats.update({f"{fid}_{bid}": ffeats[f"{fid}_{bid}"]})
'''(2) tracking, 后摄'''
if CameraEvent["cameraType"] == "back":
vts = doBackTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("back", vts))
camera_event.camera_type = camera_type
camera_event.yoloResnetTracker = yrtOut
camera_event.tracking = vts
camera_event.videoPath = ShoppingDict["backCamera"]["videoPath"]
camera_event.imagePaths = ShoppingDict["backCamera"]["imagePaths"]
shopping_event.backCamera = camera_event
yrtDict["backyrt"] = yrtOut
'''(2) tracking, 前摄'''
if CameraEvent["cameraType"] == "front":
vts = doFrontTracks(trackerboxes, trackefeats)
vts.classify()
event_tracks.append(("front", vts))
camera_event.camera_type = camera_type
camera_event.yoloResnetTracker = yrtOut
camera_event.tracking = vts
camera_event.videoPath = ShoppingDict["frontCamera"]["videoPath"]
camera_event.imagePaths = ShoppingDict["frontCamera"]["imagePaths"]
shopping_event.backCamera = camera_event
yrtDict["frontyrt"] = yrtOut
name = Path(evt_pkfile).stem
pf_path = os.path.join(savepath, name+"_new.pickle")
with open(str(pf_path), 'wb') as f:
pickle.dump(shopping_event, f)
illus = [None, None]
for CamerType, vts in event_tracks:
if len(vts.tracks)==0: continue
if CamerType == 'front':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/board_ftmp_line.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipeline, CamerType, draw5p=True)
illus[0] = img_tracking
plt = plot_frameID_y2(vts)
plt.savefig(os.path.join(savepath_pipeline, "front_y2.png"))
if CamerType == 'back':
edgeline = cv2.imread("./tracking/shopcart/cart_tempt/edgeline.png")
h, w = edgeline.shape[:2]
# nh, nw = h//2, w//2
# edgeline = cv2.resize(edgeline, (nw, nh), interpolation=cv2.INTER_AREA)
img_tracking = draw_all_trajectories(vts, edgeline, savepath_pipeline, CamerType, draw5p=True)
illus[1] = img_tracking
if __name__ == "__main__":
main()

View File

@ -1,12 +1,27 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
__version__ = '8.0.173' __version__ = "8.1.34"
from ultralytics.models import RTDETR, SAM, YOLO from ultralytics.data.explorer.explorer import Explorer
from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
from ultralytics.models.fastsam import FastSAM from ultralytics.models.fastsam import FastSAM
from ultralytics.models.nas import NAS from ultralytics.models.nas import NAS
from ultralytics.utils import SETTINGS as settings from ultralytics.utils import ASSETS, SETTINGS as settings
from ultralytics.utils.checks import check_yolo as checks from ultralytics.utils.checks import check_yolo as checks
from ultralytics.utils.downloads import download from ultralytics.utils.downloads import download
__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings' __all__ = (
"__version__",
"ASSETS",
"YOLO",
"YOLOWorld",
"NAS",
"SAM",
"FastSAM",
"RTDETR",
"checks",
"download",
"settings",
"Explorer",
"YOLOv10"
)

View File

@ -1,34 +1,62 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
import contextlib import contextlib
import re import os
import shutil import shutil
import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from typing import Dict, List, Union from typing import Dict, List, Union
import re
from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, SETTINGS, from ultralytics.utils import (
SETTINGS_YAML, IterableSimpleNamespace, __version__, checks, colorstr, deprecation_warn, ASSETS,
yaml_load, yaml_print) DEFAULT_CFG,
DEFAULT_CFG_DICT,
DEFAULT_CFG_PATH,
LOGGER,
RANK,
ROOT,
RUNS_DIR,
SETTINGS,
SETTINGS_YAML,
TESTS_RUNNING,
IterableSimpleNamespace,
__version__,
checks,
colorstr,
deprecation_warn,
yaml_load,
yaml_print,
)
# Define valid tasks and modes # Define valid tasks and modes
MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark' MODES = {"train", "val", "predict", "export", "track", "benchmark"}
TASKS = 'detect', 'segment', 'classify', 'pose' TASKS = {"detect", "segment", "classify", "pose", "obb"}
TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'} TASK2DATA = {
"detect": "coco8.yaml",
"segment": "coco8-seg.yaml",
"classify": "imagenet10",
"pose": "coco8-pose.yaml",
"obb": "dota8.yaml",
}
TASK2MODEL = { TASK2MODEL = {
'detect': 'yolov8n.pt', "detect": "yolov8n.pt",
'segment': 'yolov8n-seg.pt', "segment": "yolov8n-seg.pt",
'classify': 'yolov8n-cls.pt', "classify": "yolov8n-cls.pt",
'pose': 'yolov8n-pose.pt'} "pose": "yolov8n-pose.pt",
"obb": "yolov8n-obb.pt",
}
TASK2METRIC = { TASK2METRIC = {
'detect': 'metrics/mAP50-95(B)', "detect": "metrics/mAP50-95(B)",
'segment': 'metrics/mAP50-95(M)', "segment": "metrics/mAP50-95(M)",
'classify': 'metrics/accuracy_top1', "classify": "metrics/accuracy_top1",
'pose': 'metrics/mAP50-95(P)'} "pose": "metrics/mAP50-95(P)",
"obb": "metrics/mAP50-95(B)",
}
CLI_HELP_MSG = \ CLI_HELP_MSG = f"""
f"""
Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax: Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
yolo TASK MODE ARGS yolo TASK MODE ARGS
@ -42,7 +70,7 @@ CLI_HELP_MSG = \
yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01 yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
2. Predict a YouTube video using a pretrained segmentation model at image size 320: 2. Predict a YouTube video using a pretrained segmentation model at image size 320:
yolo predict model=yolov8n-seg.pt source='https://youtu.be/Zgi9g1ksQHc' imgsz=320 yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
3. Val a pretrained detection model at batch-size 1 and image size 640: 3. Val a pretrained detection model at batch-size 1 and image size 640:
yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640 yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
@ -50,6 +78,9 @@ CLI_HELP_MSG = \
4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required) 4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128 yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
yolo explorer
5. Run special commands: 5. Run special commands:
yolo help yolo help
yolo checks yolo checks
@ -64,16 +95,84 @@ CLI_HELP_MSG = \
""" """
# Define keys for arg type checks # Define keys for arg type checks
CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear' CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr', CFG_FRACTION_KEYS = {
'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud', "dropout",
'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0 "iou",
CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride', "lr0",
'line_width', 'workspace', 'nbs', 'save_period') "lrf",
CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val', "momentum",
'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop', "weight_decay",
'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', 'boxes', 'keras', "warmup_momentum",
'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile') "warmup_bias_lr",
"label_smoothing",
"hsv_h",
"hsv_s",
"hsv_v",
"translate",
"scale",
"perspective",
"flipud",
"fliplr",
"bgr",
"mosaic",
"mixup",
"copy_paste",
"conf",
"iou",
"fraction",
} # fraction floats 0.0 - 1.0
CFG_INT_KEYS = {
"epochs",
"patience",
"batch",
"workers",
"seed",
"close_mosaic",
"mask_ratio",
"max_det",
"vid_stride",
"line_width",
"workspace",
"nbs",
"save_period",
}
CFG_BOOL_KEYS = {
"save",
"exist_ok",
"verbose",
"deterministic",
"single_cls",
"rect",
"cos_lr",
"overlap_mask",
"val",
"save_json",
"save_hybrid",
"half",
"dnn",
"plots",
"show",
"save_txt",
"save_conf",
"save_crop",
"save_frames",
"show_labels",
"show_conf",
"visualize",
"augment",
"agnostic_nms",
"retina_masks",
"show_boxes",
"keras",
"optimize",
"int8",
"dynamic",
"simplify",
"nms",
"profile",
"multi_scale",
}
def cfg2dict(cfg): def cfg2dict(cfg):
@ -109,53 +208,72 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
# Merge overrides # Merge overrides
if overrides: if overrides:
overrides = cfg2dict(overrides) overrides = cfg2dict(overrides)
if 'save_dir' not in cfg: if "save_dir" not in cfg:
overrides.pop('save_dir', None) # special override keys to ignore overrides.pop("save_dir", None) # special override keys to ignore
check_dict_alignment(cfg, overrides) check_dict_alignment(cfg, overrides)
cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides) cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides)
# Special handling for numeric project/name # Special handling for numeric project/name
for k in 'project', 'name': for k in "project", "name":
if k in cfg and isinstance(cfg[k], (int, float)): if k in cfg and isinstance(cfg[k], (int, float)):
cfg[k] = str(cfg[k]) cfg[k] = str(cfg[k])
if cfg.get('name') == 'model': # assign model to 'name' arg if cfg.get("name") == "model": # assign model to 'name' arg
cfg['name'] = cfg.get('model', '').split('.')[0] cfg["name"] = cfg.get("model", "").split(".")[0]
LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.") LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
# Type and Value checks # Type and Value checks
for k, v in cfg.items(): check_cfg(cfg)
if v is not None: # None values may be from optional args
if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
elif k in CFG_FRACTION_KEYS:
if not isinstance(v, (int, float)):
raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
if not (0.0 <= v <= 1.0):
raise ValueError(f"'{k}={v}' is an invalid value. "
f"Valid '{k}' values are between 0.0 and 1.0.")
elif k in CFG_INT_KEYS and not isinstance(v, int):
raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"'{k}' must be an int (i.e. '{k}=8')")
elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')")
# Return instance # Return instance
return IterableSimpleNamespace(**cfg) return IterableSimpleNamespace(**cfg)
def check_cfg(cfg, hard=True):
"""Check Ultralytics configuration argument types and values."""
for k, v in cfg.items():
if v is not None: # None values may be from optional args
if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
if hard:
raise TypeError(
f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
)
cfg[k] = float(v)
elif k in CFG_FRACTION_KEYS:
if not isinstance(v, (int, float)):
if hard:
raise TypeError(
f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
)
cfg[k] = v = float(v)
if not (0.0 <= v <= 1.0):
raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
elif k in CFG_INT_KEYS and not isinstance(v, int):
if hard:
raise TypeError(
f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
)
cfg[k] = int(v)
elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
if hard:
raise TypeError(
f"'{k}={v}' is of invalid type {type(v).__name__}. "
f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
)
cfg[k] = bool(v)
def get_save_dir(args, name=None): def get_save_dir(args, name=None):
"""Return save_dir as created from train/val/predict arguments.""" """Return save_dir as created from train/val/predict arguments."""
if getattr(args, 'save_dir', None): if getattr(args, "save_dir", None):
save_dir = args.save_dir save_dir = args.save_dir
else: else:
from ultralytics.utils.files import increment_path from ultralytics.utils.files import increment_path
project = args.project or Path(SETTINGS['runs_dir']) / args.task project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
name = name or args.name or f'{args.mode}' name = name or args.name or f"{args.mode}"
save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True) save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
return Path(save_dir) return Path(save_dir)
@ -165,23 +283,26 @@ def _handle_deprecation(custom):
"""Hardcoded function to handle deprecated config keys.""" """Hardcoded function to handle deprecated config keys."""
for key in custom.copy().keys(): for key in custom.copy().keys():
if key == 'hide_labels': if key == "boxes":
deprecation_warn(key, 'show_labels') deprecation_warn(key, "show_boxes")
custom['show_labels'] = custom.pop('hide_labels') == 'False' custom["show_boxes"] = custom.pop("boxes")
if key == 'hide_conf': if key == "hide_labels":
deprecation_warn(key, 'show_conf') deprecation_warn(key, "show_labels")
custom['show_conf'] = custom.pop('hide_conf') == 'False' custom["show_labels"] = custom.pop("hide_labels") == "False"
if key == 'line_thickness': if key == "hide_conf":
deprecation_warn(key, 'line_width') deprecation_warn(key, "show_conf")
custom['line_width'] = custom.pop('line_thickness') custom["show_conf"] = custom.pop("hide_conf") == "False"
if key == "line_thickness":
deprecation_warn(key, "line_width")
custom["line_width"] = custom.pop("line_thickness")
return custom return custom
def check_dict_alignment(base: Dict, custom: Dict, e=None): def check_dict_alignment(base: Dict, custom: Dict, e=None):
""" """
This function checks for any mismatched keys between a custom configuration list and a base configuration list. This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
If any mismatched keys are found, the function prints out similar keys from the base list and exits the program. any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
Args: Args:
custom (dict): a dictionary of custom configuration options custom (dict): a dictionary of custom configuration options
@ -194,36 +315,35 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
if mismatched: if mismatched:
from difflib import get_close_matches from difflib import get_close_matches
string = '' string = ""
for x in mismatched: for x in mismatched:
matches = get_close_matches(x, base_keys) # key list matches = get_close_matches(x, base_keys) # key list
matches = [f'{k}={base[k]}' if base.get(k) is not None else k for k in matches] matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
match_str = f'Similar arguments are i.e. {matches}.' if matches else '' match_str = f"Similar arguments are i.e. {matches}." if matches else ""
string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n" string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
raise SyntaxError(string + CLI_HELP_MSG) from e raise SyntaxError(string + CLI_HELP_MSG) from e
def merge_equals_args(args: List[str]) -> List[str]: def merge_equals_args(args: List[str]) -> List[str]:
""" """
Merges arguments around isolated '=' args in a list of strings. Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
The function considers cases where the first argument ends with '=' or the second starts with '=', argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
as well as when the middle one is an equals sign.
Args: Args:
args (List[str]): A list of strings where each element is an argument. args (List[str]): A list of strings where each element is an argument.
Returns: Returns:
List[str]: A list of strings where the arguments around isolated '=' are merged. (List[str]): A list of strings where the arguments around isolated '=' are merged.
""" """
new_args = [] new_args = []
for i, arg in enumerate(args): for i, arg in enumerate(args):
if arg == '=' and 0 < i < len(args) - 1: # merge ['arg', '=', 'val'] if arg == "=" and 0 < i < len(args) - 1: # merge ['arg', '=', 'val']
new_args[-1] += f'={args[i + 1]}' new_args[-1] += f"={args[i + 1]}"
del args[i + 1] del args[i + 1]
elif arg.endswith('=') and i < len(args) - 1 and '=' not in args[i + 1]: # merge ['arg=', 'val'] elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]: # merge ['arg=', 'val']
new_args.append(f'{arg}{args[i + 1]}') new_args.append(f"{arg}{args[i + 1]}")
del args[i + 1] del args[i + 1]
elif arg.startswith('=') and i > 0: # merge ['arg', '=val'] elif arg.startswith("=") and i > 0: # merge ['arg', '=val']
new_args[-1] += arg new_args[-1] += arg
else: else:
new_args.append(arg) new_args.append(arg)
@ -247,11 +367,11 @@ def handle_yolo_hub(args: List[str]) -> None:
""" """
from ultralytics import hub from ultralytics import hub
if args[0] == 'login': if args[0] == "login":
key = args[1] if len(args) > 1 else '' key = args[1] if len(args) > 1 else ""
# Log in to Ultralytics HUB using the provided API key # Log in to Ultralytics HUB using the provided API key
hub.login(key) hub.login(key)
elif args[0] == 'logout': elif args[0] == "logout":
# Log out from Ultralytics HUB # Log out from Ultralytics HUB
hub.logout() hub.logout()
@ -271,39 +391,47 @@ def handle_yolo_settings(args: List[str]) -> None:
python my_script.py yolo settings reset python my_script.py yolo settings reset
``` ```
""" """
url = 'https://docs.ultralytics.com/quickstart/#ultralytics-settings' # help URL url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings" # help URL
try: try:
if any(args): if any(args):
if args[0] == 'reset': if args[0] == "reset":
SETTINGS_YAML.unlink() # delete the settings file SETTINGS_YAML.unlink() # delete the settings file
SETTINGS.reset() # create new settings SETTINGS.reset() # create new settings
LOGGER.info('Settings reset successfully') # inform the user that settings have been reset LOGGER.info("Settings reset successfully") # inform the user that settings have been reset
else: # save a new setting else: # save a new setting
new = dict(parse_key_value_pair(a) for a in args) new = dict(parse_key_value_pair(a) for a in args)
check_dict_alignment(SETTINGS, new) check_dict_alignment(SETTINGS, new)
SETTINGS.update(new) SETTINGS.update(new)
LOGGER.info(f'💡 Learn about settings at {url}') LOGGER.info(f"💡 Learn about settings at {url}")
yaml_print(SETTINGS_YAML) # print the current settings yaml_print(SETTINGS_YAML) # print the current settings
except Exception as e: except Exception as e:
LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.") LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
def handle_explorer():
"""Open the Ultralytics Explorer GUI."""
checks.check_requirements("streamlit")
LOGGER.info("💡 Loading Explorer dashboard...")
subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
def parse_key_value_pair(pair): def parse_key_value_pair(pair):
"""Parse one 'key=value' pair and return key and value.""" """Parse one 'key=value' pair and return key and value."""
re.sub(r' *= *', '=', pair) # remove spaces around equals sign k, v = pair.split("=", 1) # split on first '=' sign
k, v = pair.split('=', 1) # split on first '=' sign k, v = k.strip(), v.strip() # remove spaces
assert v, f"missing '{k}' value" assert v, f"missing '{k}' value"
return k, smart_value(v) return k, smart_value(v)
def smart_value(v): def smart_value(v):
"""Convert a string to an underlying type such as int, float, bool, etc.""" """Convert a string to an underlying type such as int, float, bool, etc."""
if v.lower() == 'none': v_lower = v.lower()
if v_lower == "none":
return None return None
elif v.lower() == 'true': elif v_lower == "true":
return True return True
elif v.lower() == 'false': elif v_lower == "false":
return False return False
else: else:
with contextlib.suppress(Exception): with contextlib.suppress(Exception):
@ -311,7 +439,7 @@ def smart_value(v):
return v return v
def entrypoint(debug=''): def entrypoint(debug=""):
""" """
This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
to the package. to the package.
@ -326,135 +454,160 @@ def entrypoint(debug=''):
It uses the package's default cfg and initializes it using the passed overrides. It uses the package's default cfg and initializes it using the passed overrides.
Then it calls the CLI function with the composed cfg Then it calls the CLI function with the composed cfg
""" """
args = (debug.split(' ') if debug else sys.argv)[1:] args = (debug.split(" ") if debug else sys.argv)[1:]
if not args: # no arguments passed if not args: # no arguments passed
LOGGER.info(CLI_HELP_MSG) LOGGER.info(CLI_HELP_MSG)
return return
special = { special = {
'help': lambda: LOGGER.info(CLI_HELP_MSG), "help": lambda: LOGGER.info(CLI_HELP_MSG),
'checks': checks.check_yolo, "checks": checks.collect_system_info,
'version': lambda: LOGGER.info(__version__), "version": lambda: LOGGER.info(__version__),
'settings': lambda: handle_yolo_settings(args[1:]), "settings": lambda: handle_yolo_settings(args[1:]),
'cfg': lambda: yaml_print(DEFAULT_CFG_PATH), "cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
'hub': lambda: handle_yolo_hub(args[1:]), "hub": lambda: handle_yolo_hub(args[1:]),
'login': lambda: handle_yolo_hub(args), "login": lambda: handle_yolo_hub(args),
'copy-cfg': copy_default_cfg} "copy-cfg": copy_default_cfg,
"explorer": lambda: handle_explorer(),
}
full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special} full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
# Define common mis-uses of special commands, i.e. -h, -help, --help # Define common misuses of special commands, i.e. -h, -help, --help
special.update({k[0]: v for k, v in special.items()}) # singular special.update({k[0]: v for k, v in special.items()}) # singular
special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith('s')}) # singular special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")}) # singular
special = {**special, **{f'-{k}': v for k, v in special.items()}, **{f'--{k}': v for k, v in special.items()}} special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}
overrides = {} # basic overrides, i.e. imgsz=320 overrides = {} # basic overrides, i.e. imgsz=320
for a in merge_equals_args(args): # merge spaces around '=' sign for a in merge_equals_args(args): # merge spaces around '=' sign
if a.startswith('--'): if a.startswith("--"):
LOGGER.warning(f"WARNING ⚠️ '{a}' does not require leading dashes '--', updating to '{a[2:]}'.") LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
a = a[2:] a = a[2:]
if a.endswith(','): if a.endswith(","):
LOGGER.warning(f"WARNING ⚠️ '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.") LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
a = a[:-1] a = a[:-1]
if '=' in a: if "=" in a:
try: try:
k, v = parse_key_value_pair(a) k, v = parse_key_value_pair(a)
if k == 'cfg': # custom.yaml passed if k == "cfg" and v is not None: # custom.yaml passed
LOGGER.info(f'Overriding {DEFAULT_CFG_PATH} with {v}') LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != 'cfg'} overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
else: else:
overrides[k] = v overrides[k] = v
except (NameError, SyntaxError, ValueError, AssertionError) as e: except (NameError, SyntaxError, ValueError, AssertionError) as e:
check_dict_alignment(full_args_dict, {a: ''}, e) check_dict_alignment(full_args_dict, {a: ""}, e)
elif a in TASKS: elif a in TASKS:
overrides['task'] = a overrides["task"] = a
elif a in MODES: elif a in MODES:
overrides['mode'] = a overrides["mode"] = a
elif a.lower() in special: elif a.lower() in special:
special[a.lower()]() special[a.lower()]()
return return
elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool): elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True
elif a in DEFAULT_CFG_DICT: elif a in DEFAULT_CFG_DICT:
raise SyntaxError(f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign " raise SyntaxError(
f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}") f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
)
else: else:
check_dict_alignment(full_args_dict, {a: ''}) check_dict_alignment(full_args_dict, {a: ""})
# Check keys # Check keys
check_dict_alignment(full_args_dict, overrides) check_dict_alignment(full_args_dict, overrides)
# Mode # Mode
mode = overrides.get('mode') mode = overrides.get("mode")
if mode is None: if mode is None:
mode = DEFAULT_CFG.mode or 'predict' mode = DEFAULT_CFG.mode or "predict"
LOGGER.warning(f"WARNING ⚠️ 'mode' is missing. Valid modes are {MODES}. Using default 'mode={mode}'.") LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
elif mode not in MODES: elif mode not in MODES:
raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}") raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
# Task # Task
task = overrides.pop('task', None) task = overrides.pop("task", None)
if task: if task:
if task not in TASKS: if task not in TASKS:
raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}") raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
if 'model' not in overrides: if "model" not in overrides:
overrides['model'] = TASK2MODEL[task] overrides["model"] = TASK2MODEL[task]
# Model # Model
model = overrides.pop('model', DEFAULT_CFG.model) model = overrides.pop("model", DEFAULT_CFG.model)
if model is None: if model is None:
model = 'yolov8n.pt' model = "yolov8n.pt"
LOGGER.warning(f"WARNING ⚠️ 'model' is missing. Using default 'model={model}'.") LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
overrides['model'] = model overrides["model"] = model
if 'rtdetr' in model.lower(): # guess architecture # stem = Path(model).stem.lower()
stem = model.lower()
if "rtdetr" in stem: # guess architecture
from ultralytics import RTDETR from ultralytics import RTDETR
model = RTDETR(model) # no task argument model = RTDETR(model) # no task argument
elif 'fastsam' in model.lower(): elif "fastsam" in stem:
from ultralytics import FastSAM from ultralytics import FastSAM
model = FastSAM(model) model = FastSAM(model)
elif 'sam' in model.lower(): elif "sam" in stem:
from ultralytics import SAM from ultralytics import SAM
model = SAM(model) model = SAM(model)
else: elif re.search("v3|v5|v6|v8|v9", stem):
from ultralytics import YOLO from ultralytics import YOLO
model = YOLO(model, task=task) model = YOLO(model, task=task)
if isinstance(overrides.get('pretrained'), str): else:
model.load(overrides['pretrained']) from ultralytics import YOLOv10
# Special case for the HuggingFace Hub
split_path = model.split('/')
if len(split_path) == 2 and (not os.path.exists(model)):
model = YOLOv10.from_pretrained(model)
else:
model = YOLOv10(model)
if isinstance(overrides.get("pretrained"), str):
model.load(overrides["pretrained"])
# Task Update # Task Update
if task != model.task: if task != model.task:
if task: if task:
LOGGER.warning(f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. " LOGGER.warning(
f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model.") f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
)
task = model.task task = model.task
# Mode # Mode
if mode in ('predict', 'track') and 'source' not in overrides: if mode in ("predict", "track") and "source" not in overrides:
overrides['source'] = DEFAULT_CFG.source or ASSETS overrides["source"] = DEFAULT_CFG.source or ASSETS
LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.") LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
elif mode in ('train', 'val'): elif mode in ("train", "val"):
if 'data' not in overrides and 'resume' not in overrides: if "data" not in overrides and "resume" not in overrides:
overrides['data'] = TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data) overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
LOGGER.warning(f"WARNING ⚠️ 'data' is missing. Using default 'data={overrides['data']}'.") LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
elif mode == 'export': elif mode == "export":
if 'format' not in overrides: if "format" not in overrides:
overrides['format'] = DEFAULT_CFG.format or 'torchscript' overrides["format"] = DEFAULT_CFG.format or "torchscript"
LOGGER.warning(f"WARNING ⚠️ 'format' is missing. Using default 'format={overrides['format']}'.") LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")
# Run command in python # Run command in python
# getattr(model, mode)(**vars(get_cfg(overrides=overrides))) # default args using default.yaml
getattr(model, mode)(**overrides) # default args from model getattr(model, mode)(**overrides) # default args from model
# Show help
LOGGER.info(f"💡 Learn more at https://docs.ultralytics.com/modes/{mode}")
# Special modes -------------------------------------------------------------------------------------------------------- # Special modes --------------------------------------------------------------------------------------------------------
def copy_default_cfg(): def copy_default_cfg():
"""Copy and create a new default configuration file with '_copy' appended to its name.""" """Copy and create a new default configuration file with '_copy' appended to its name."""
new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml') new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
shutil.copy2(DEFAULT_CFG_PATH, new_file) shutil.copy2(DEFAULT_CFG_PATH, new_file)
LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n' LOGGER.info(
f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8") f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8"
)
if __name__ == '__main__': if __name__ == "__main__":
# Example: entrypoint(debug='yolo predict model=yolov8n.pt') # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
entrypoint(debug='') entrypoint(debug="")

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI # Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
# Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
# Example usage: yolo train data=Argoverse.yaml # Example usage: yolo train data=Argoverse.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── Argoverse ← downloads here (31.5 GB) # └── Argoverse ← downloads here (31.5 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Argoverse # dataset root dir path: ../datasets/Argoverse # dataset root dir
train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
# Classes # Classes
names: names:
@ -24,7 +24,6 @@ names:
6: traffic_light 6: traffic_light
7: stop_sign 7: stop_sign
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
import json import json
@ -64,7 +63,9 @@ download: |
# Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link) # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
dir = Path(yaml['path']) # dataset root dir dir = Path(yaml['path']) # dataset root dir
urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link'] urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
download(urls, dir=dir) print("\n\nWARNING: Argoverse dataset MUST be downloaded manually, autodownload will NOT work.")
print(f"WARNING: Manually download Argoverse dataset '{urls[0]}' to '{dir}' and re-run your command.\n\n")
# download(urls, dir=dir)
# Convert # Convert
annotations_dir = 'Argoverse-HD/annotations/' annotations_dir = 'Argoverse-HD/annotations/'

View File

@ -1,18 +1,19 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University # DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml # Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── dota2 ← downloads here (2GB) # └── dota1.5 ← downloads here (2GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/DOTAv2 # dataset root dir path: ../datasets/DOTAv1.5 # dataset root dir
train: images/train # train images (relative to 'path') 1411 images train: images/train # train images (relative to 'path') 1411 images
val: images/val # val images (relative to 'path') 458 images val: images/val # val images (relative to 'path') 458 images
test: images/test # test images (optional) 937 images test: images/test # test images (optional) 937 images
# Classes for DOTA 2.0 # Classes for DOTA 1.5
names: names:
0: plane 0: plane
1: ship 1: ship
@ -30,8 +31,6 @@ names:
13: soccer ball field 13: soccer ball field
14: swimming pool 14: swimming pool
15: container crane 15: container crane
16: airport
17: helipad
# Download script/URL (optional) # Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip

View File

@ -0,0 +1,35 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
# parent
# ├── ultralytics
# └── datasets
# └── dota1 ← downloads here (2GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/DOTAv1 # dataset root dir
train: images/train # train images (relative to 'path') 1411 images
val: images/val # val images (relative to 'path') 458 images
test: images/test # test images (optional) 937 images
# Classes for DOTA 1.0
names:
0: plane
1: ship
2: storage tank
3: baseball diamond
4: tennis court
5: basketball court
6: ground track field
7: harbor
8: bridge
9: large vehicle
10: small vehicle
11: helicopter
12: roundabout
13: soccer ball field
14: swimming pool
# Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip

View File

@ -1,14 +1,14 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan # Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
# Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
# Example usage: yolo train data=GlobalWheat2020.yaml # Example usage: yolo train data=GlobalWheat2020.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── GlobalWheat2020 ← downloads here (7.0 GB) # └── GlobalWheat2020 ← downloads here (7.0 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/GlobalWheat2020 # dataset root dir path: ../datasets/GlobalWheat2020 # dataset root dir
train: # train images (relative to 'path') 3422 images train: # train images (relative to 'path') 3422 images
- images/arvalis_1 - images/arvalis_1
- images/arvalis_2 - images/arvalis_2
@ -29,7 +29,6 @@ test: # test images (optional) 1276 images
names: names:
0: wheat_head 0: wheat_head
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
from ultralytics.utils.downloads import download from ultralytics.utils.downloads import download

View File

@ -1,18 +1,18 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
# Documentation: https://docs.ultralytics.com/datasets/classify/imagenet/
# Example usage: yolo train task=classify data=imagenet # Example usage: yolo train task=classify data=imagenet
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── imagenet ← downloads here (144 GB) # └── imagenet ← downloads here (144 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/imagenet # dataset root dir path: ../datasets/imagenet # dataset root dir
train: train # train images (relative to 'path') 1281167 images train: train # train images (relative to 'path') 1281167 images
val: val # val images (relative to 'path') 50000 images val: val # val images (relative to 'path') 50000 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -2020,6 +2020,5 @@ map:
n13133613: ear n13133613: ear
n15075141: toilet_tissue n15075141: toilet_tissue
# Download script/URL (optional) # Download script/URL (optional)
download: yolo/data/scripts/get_imagenet.sh download: yolo/data/scripts/get_imagenet.sh

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# Objects365 dataset https://www.objects365.org/ by Megvii # Objects365 dataset https://www.objects365.org/ by Megvii
# Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
# Example usage: yolo train data=Objects365.yaml # Example usage: yolo train data=Objects365.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── Objects365 ← downloads here (712 GB = 367G data + 345G zips) # └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/Objects365 # dataset root dir path: ../datasets/Objects365 # dataset root dir
train: images/train # train images (relative to 'path') 1742289 images train: images/train # train images (relative to 'path') 1742289 images
val: images/val # val images (relative to 'path') 80000 images val: images/val # val images (relative to 'path') 80000 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -381,7 +381,6 @@ names:
363: Curling 363: Curling
364: Table Tennis 364: Table Tennis
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
from tqdm import tqdm from tqdm import tqdm

View File

@ -1,23 +1,22 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
# Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
# Example usage: yolo train data=SKU-110K.yaml # Example usage: yolo train data=SKU-110K.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── SKU-110K ← downloads here (13.6 GB) # └── SKU-110K ← downloads here (13.6 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/SKU-110K # dataset root dir path: ../datasets/SKU-110K # dataset root dir
train: train.txt # train images (relative to 'path') 8219 images train: train.txt # train images (relative to 'path') 8219 images
val: val.txt # val images (relative to 'path') 588 images val: val.txt # val images (relative to 'path') 588 images
test: test.txt # test images (optional) 2936 images test: test.txt # test images (optional) 2936 images
# Classes # Classes
names: names:
0: object 0: object
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
import shutil import shutil

View File

@ -1,12 +1,12 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
# Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
# Example usage: yolo train data=VOC.yaml # Example usage: yolo train data=VOC.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── VOC ← downloads here (2.8 GB) # └── VOC ← downloads here (2.8 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VOC path: ../datasets/VOC
train: # train images (relative to 'path') 16551 images train: # train images (relative to 'path') 16551 images
@ -42,7 +42,6 @@ names:
18: train 18: train
19: tvmonitor 19: tvmonitor
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@ -81,7 +80,7 @@ download: |
urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
download(urls, dir=dir / 'images', curl=True, threads=3) download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True) # download and unzip over existing paths (required)
# Convert # Convert
path = dir / 'images/VOCdevkit' path = dir / 'images/VOCdevkit'

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
# Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
# Example usage: yolo train data=VisDrone.yaml # Example usage: yolo train data=VisDrone.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── VisDrone ← downloads here (2.3 GB) # └── VisDrone ← downloads here (2.3 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/VisDrone # dataset root dir path: ../datasets/VisDrone # dataset root dir
train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
# Classes # Classes
names: names:
@ -26,7 +26,6 @@ names:
8: bus 8: bus
9: motor 9: motor
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
import os import os

View File

@ -0,0 +1,24 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# African-wildlife dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
# Example usage: yolo train data=african-wildlife.yaml
# parent
# ├── ultralytics
# └── datasets
# └── african-wildlife ← downloads here (100 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/african-wildlife # dataset root dir
train: train/images # train images (relative to 'path') 1052 images
val: valid/images # val images (relative to 'path') 225 images
test: test/images # test images (relative to 'path') 227 images
# Classes
names:
0: buffalo
1: elephant
2: rhino
3: zebra
# Download script/URL (optional)
download: https://ultralytics.com/assets/african-wildlife.zip

View File

@ -0,0 +1,22 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Brain-tumor dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
# Example usage: yolo train data=brain-tumor.yaml
# parent
# ├── ultralytics
# └── datasets
# └── brain-tumor ← downloads here (4.05 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/brain-tumor # dataset root dir
train: train/images # train images (relative to 'path') 893 images
val: valid/images # val images (relative to 'path') 223 images
test: # test images (relative to 'path')
# Classes
names:
0: negative
1: positive
# Download script/URL (optional)
download: https://ultralytics.com/assets/brain-tumor.zip

View File

@ -0,0 +1,43 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Carparts-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
# Example usage: yolo train data=carparts-seg.yaml
# parent
# ├── ultralytics
# └── datasets
# └── carparts-seg ← downloads here (132 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/carparts-seg # dataset root dir
train: train/images # train images (relative to 'path') 3516 images
val: valid/images # val images (relative to 'path') 276 images
test: test/images # test images (relative to 'path') 401 images
# Classes
names:
0: back_bumper
1: back_door
2: back_glass
3: back_left_door
4: back_left_light
5: back_light
6: back_right_door
7: back_right_light
8: front_bumper
9: front_door
10: front_glass
11: front_left_door
12: front_left_light
13: front_light
14: front_right_door
15: front_right_light
16: hood
17: left_mirror
18: object
19: right_mirror
20: tailgate
21: trunk
22: wheel
# Download script/URL (optional)
download: https://ultralytics.com/assets/carparts-seg.zip

View File

@ -1,20 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft # COCO 2017 dataset https://cocodataset.org by Microsoft
# Documentation: https://docs.ultralytics.com/datasets/pose/coco/
# Example usage: yolo train data=coco-pose.yaml # Example usage: yolo train data=coco-pose.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco-pose ← downloads here (20.1 GB) # └── coco-pose ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco-pose # dataset root dir path: ../datasets/coco-pose # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Keypoints # Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes # Classes

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft # COCO 2017 dataset https://cocodataset.org by Microsoft
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
# Example usage: yolo train data=coco.yaml # Example usage: yolo train data=coco.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco ← downloads here (20.1 GB) # └── coco ← downloads here (20.1 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco # dataset root dir path: ../datasets/coco # dataset root dir
train: train2017.txt # train images (relative to 'path') 118287 images train: train2017.txt # train images (relative to 'path') 118287 images
val: val2017.txt # val images (relative to 'path') 5000 images val: val2017.txt # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
# Classes # Classes
names: names:
@ -96,7 +96,6 @@ names:
78: hair drier 78: hair drier
79: toothbrush 79: toothbrush
# Download script/URL (optional) # Download script/URL (optional)
download: | download: |
from ultralytics.utils.downloads import download from ultralytics.utils.downloads import download

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
# Example usage: yolo train data=coco128.yaml # Example usage: yolo train data=coco128.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco128-seg ← downloads here (7 MB) # └── coco128-seg ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128-seg # dataset root dir path: ../datasets/coco128-seg # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -96,6 +96,5 @@ names:
78: hair drier 78: hair drier
79: toothbrush 79: toothbrush
# Download script/URL (optional) # Download script/URL (optional)
download: https://ultralytics.com/assets/coco128-seg.zip download: https://ultralytics.com/assets/coco128-seg.zip

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
# Example usage: yolo train data=coco128.yaml # Example usage: yolo train data=coco128.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco128 ← downloads here (7 MB) # └── coco128 ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco128 # dataset root dir path: ../datasets/coco128 # dataset root dir
train: images/train2017 # train images (relative to 'path') 128 images train: images/train2017 # train images (relative to 'path') 128 images
val: images/train2017 # val images (relative to 'path') 128 images val: images/train2017 # val images (relative to 'path') 128 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -96,6 +96,5 @@ names:
78: hair drier 78: hair drier
79: toothbrush 79: toothbrush
# Download script/URL (optional) # Download script/URL (optional)
download: https://ultralytics.com/assets/coco128.zip download: https://ultralytics.com/assets/coco128.zip

View File

@ -1,20 +1,20 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
# Example usage: yolo train data=coco8-pose.yaml # Example usage: yolo train data=coco8-pose.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco8-pose ← downloads here (1 MB) # └── coco8-pose ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8-pose # dataset root dir path: ../datasets/coco8-pose # dataset root dir
train: images/train # train images (relative to 'path') 4 images train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images
test: # test images (optional) test: # test images (optional)
# Keypoints # Keypoints
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
# Classes # Classes

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
# Example usage: yolo train data=coco8-seg.yaml # Example usage: yolo train data=coco8-seg.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco8-seg ← downloads here (1 MB) # └── coco8-seg ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8-seg # dataset root dir path: ../datasets/coco8-seg # dataset root dir
train: images/train # train images (relative to 'path') 4 images train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -96,6 +96,5 @@ names:
78: hair drier 78: hair drier
79: toothbrush 79: toothbrush
# Download script/URL (optional) # Download script/URL (optional)
download: https://ultralytics.com/assets/coco8-seg.zip download: https://ultralytics.com/assets/coco8-seg.zip

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
# Example usage: yolo train data=coco8.yaml # Example usage: yolo train data=coco8.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── coco8 ← downloads here (1 MB) # └── coco8 ← downloads here (1 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/coco8 # dataset root dir path: ../datasets/coco8 # dataset root dir
train: images/train # train images (relative to 'path') 4 images train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images val: images/val # val images (relative to 'path') 4 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -96,6 +96,5 @@ names:
78: hair drier 78: hair drier
79: toothbrush 79: toothbrush
# Download script/URL (optional) # Download script/URL (optional)
download: https://ultralytics.com/assets/coco8.zip download: https://ultralytics.com/assets/coco8.zip

View File

@ -0,0 +1,21 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Crack-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
# Example usage: yolo train data=crack-seg.yaml
# parent
# ├── ultralytics
# └── datasets
# └── crack-seg ← downloads here (91.2 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/crack-seg # dataset root dir
train: train/images # train images (relative to 'path') 3717 images
val: valid/images # val images (relative to 'path') 112 images
test: test/images # test images (relative to 'path') 200 images
# Classes
names:
0: crack
# Download script/URL (optional)
download: https://ultralytics.com/assets/crack-seg.zip

View File

@ -0,0 +1,34 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
# parent
# ├── ultralytics
# └── datasets
# └── dota8 ← downloads here (1MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/dota8 # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
# Classes for DOTA 1.0
names:
0: plane
1: ship
2: storage tank
3: baseball diamond
4: tennis court
5: basketball court
6: ground track field
7: harbor
8: bridge
9: large vehicle
10: small vehicle
11: helicopter
12: roundabout
13: soccer ball field
14: swimming pool
# Download script/URL (optional)
download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google # Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
# Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
# Example usage: yolo train data=open-images-v7.yaml # Example usage: yolo train data=open-images-v7.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── open-images-v7 ← downloads here (561 GB) # └── open-images-v7 ← downloads here (561 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/open-images-v7 # dataset root dir path: ../datasets/open-images-v7 # dataset root dir
train: images/train # train images (relative to 'path') 1743042 images train: images/train # train images (relative to 'path') 1743042 images
val: images/val # val images (relative to 'path') 41620 images val: images/val # val images (relative to 'path') 41620 images
test: # test images (optional) test: # test images (optional)
# Classes # Classes
names: names:
@ -617,7 +617,6 @@ names:
599: Zebra 599: Zebra
600: Zucchini 600: Zucchini
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version

View File

@ -0,0 +1,21 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Package-seg dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
# Example usage: yolo train data=package-seg.yaml
# parent
# ├── ultralytics
# └── datasets
# └── package-seg ← downloads here (102 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/package-seg # dataset root dir
train: images/train # train images (relative to 'path') 1920 images
val: images/val # val images (relative to 'path') 89 images
test: test/images # test images (relative to 'path') 188 images
# Classes
names:
0: package
# Download script/URL (optional)
download: https://ultralytics.com/assets/package-seg.zip

View File

@ -0,0 +1,24 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Tiger Pose dataset by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
# Example usage: yolo train data=tiger-pose.yaml
# parent
# ├── ultralytics
# └── datasets
# └── tiger-pose ← downloads here (75.3 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/tiger-pose # dataset root dir
train: train # train images (relative to 'path') 210 images
val: val # val images (relative to 'path') 53 images
# Keypoints
kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# Classes
names:
0: tiger
# Download script/URL (optional)
download: https://ultralytics.com/assets/tiger-pose.zip

View File

@ -1,17 +1,17 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA) # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
# -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! -------- # -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
# Example usage: yolo train data=xView.yaml # Example usage: yolo train data=xView.yaml
# parent # parent
# ├── ultralytics # ├── ultralytics
# └── datasets # └── datasets
# └── xView ← downloads here (20.7 GB) # └── xView ← downloads here (20.7 GB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: ../datasets/xView # dataset root dir path: ../datasets/xView # dataset root dir
train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
# Classes # Classes
names: names:
@ -76,7 +76,6 @@ names:
58: Pylon 58: Pylon
59: Tower 59: Tower
# Download script/URL (optional) --------------------------------------------------------------------------------------- # Download script/URL (optional) ---------------------------------------------------------------------------------------
download: | download: |
import json import json

View File

@ -1,116 +1,127 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license # Ultralytics YOLO 🚀, AGPL-3.0 license
# Default training settings and hyperparameters for medium-augmentation COCO training # Default training settings and hyperparameters for medium-augmentation COCO training
task: detect # (str) YOLO task, i.e. detect, segment, classify, pose task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
# Train settings ------------------------------------------------------------------------------------------------------- # Train settings -------------------------------------------------------------------------------------------------------
model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
data: # (str, optional) path to data file, i.e. coco128.yaml data: # (str, optional) path to data file, i.e. coco128.yaml
epochs: 100 # (int) number of epochs to train for epochs: 100 # (int) number of epochs to train for
patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training time: # (float, optional) number of hours to train for, overrides epochs if supplied
batch: 16 # (int) number of images per batch (-1 for AutoBatch) patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes batch: 16 # (int) number of images per batch (-1 for AutoBatch)
save: True # (bool) save train checkpoints and predict results imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
save: True # (bool) save train checkpoints and predict results
save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1) save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
cache: False # (bool) True/ram, disk or False. Use cache for data loading val_period: 1 # (int) Validation every x epochs
device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu cache: False # (bool) True/ram, disk or False. Use cache for data loading
workers: 8 # (int) number of worker threads for data loading (per RANK if DDP) device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
project: # (str, optional) project name workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
name: # (str, optional) experiment name, results saved to 'project/name' directory project: # (str, optional) project name
exist_ok: False # (bool) whether to overwrite existing experiment name: # (str, optional) experiment name, results saved to 'project/name' directory
pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str) exist_ok: False # (bool) whether to overwrite existing experiment
optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto] pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
verbose: True # (bool) whether to print verbose output optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
seed: 0 # (int) random seed for reproducibility verbose: True # (bool) whether to print verbose output
deterministic: True # (bool) whether to enable deterministic mode seed: 0 # (int) random seed for reproducibility
single_cls: False # (bool) train multi-class data as single-class deterministic: True # (bool) whether to enable deterministic mode
rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val' single_cls: False # (bool) train multi-class data as single-class
cos_lr: False # (bool) use cosine learning rate scheduler rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable) cos_lr: False # (bool) use cosine learning rate scheduler
resume: False # (bool) resume training from last checkpoint close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check resume: False # (bool) resume training from last checkpoint
fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set) amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
multi_scale: False # (bool) Whether to use multiscale during training
# Segmentation # Segmentation
overlap_mask: True # (bool) masks should overlap during training (segment train only) overlap_mask: True # (bool) masks should overlap during training (segment train only)
mask_ratio: 4 # (int) mask downsample ratio (segment train only) mask_ratio: 4 # (int) mask downsample ratio (segment train only)
# Classification # Classification
dropout: 0.0 # (float) use dropout regularization (classify train only) dropout: 0.0 # (float) use dropout regularization (classify train only)
# Val/Test settings ---------------------------------------------------------------------------------------------------- # Val/Test settings ----------------------------------------------------------------------------------------------------
val: True # (bool) validate/test during training val: True # (bool) validate/test during training
split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train' split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
save_json: False # (bool) save results to JSON file save_json: False # (bool) save results to JSON file
save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions) save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val) conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
iou: 0.7 # (float) intersection over union (IoU) threshold for NMS iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
max_det: 300 # (int) maximum number of detections per image max_det: 300 # (int) maximum number of detections per image
half: False # (bool) use half precision (FP16) half: False # (bool) use half precision (FP16)
dnn: False # (bool) use OpenCV DNN for ONNX inference dnn: False # (bool) use OpenCV DNN for ONNX inference
plots: True # (bool) save plots during train/val plots: True # (bool) save plots and images during train/val
# Prediction settings -------------------------------------------------------------------------------------------------- # Predict settings -----------------------------------------------------------------------------------------------------
source: # (str, optional) source directory for images or videos source: # (str, optional) source directory for images or videos
show: False # (bool) show results if possible vid_stride: 1 # (int) video frame-rate stride
save_txt: False # (bool) save results as .txt file stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
save_conf: False # (bool) save results with confidence scores visualize: False # (bool) visualize model features
save_crop: False # (bool) save cropped images with results augment: False # (bool) apply image augmentation to prediction sources
show_labels: True # (bool) show object labels in plots agnostic_nms: False # (bool) class-agnostic NMS
show_conf: True # (bool) show object confidence scores in plots classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
vid_stride: 1 # (int) video frame-rate stride retina_masks: False # (bool) use high-resolution segmentation masks
stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False) embed: # (list[int], optional) return feature vectors/embeddings from given layers
line_width: # (int, optional) line width of the bounding boxes, auto if missing
visualize: False # (bool) visualize model features # Visualize settings ---------------------------------------------------------------------------------------------------
augment: False # (bool) apply image augmentation to prediction sources show: False # (bool) show predicted images and videos if environment allows
agnostic_nms: False # (bool) class-agnostic NMS save_frames: False # (bool) save predicted individual video frames
classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3] save_txt: False # (bool) save results as .txt file
retina_masks: False # (bool) use high-resolution segmentation masks save_conf: False # (bool) save results with confidence scores
boxes: True # (bool) Show boxes in segmentation predictions save_crop: False # (bool) save cropped images with results
show_labels: True # (bool) show prediction labels, i.e. 'person'
show_conf: True # (bool) show prediction confidence, i.e. '0.99'
show_boxes: True # (bool) show prediction boxes
line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
# Export settings ------------------------------------------------------------------------------------------------------ # Export settings ------------------------------------------------------------------------------------------------------
format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
keras: False # (bool) use Kera=s keras: False # (bool) use Kera=s
optimize: False # (bool) TorchScript: optimize for mobile optimize: False # (bool) TorchScript: optimize for mobile
int8: False # (bool) CoreML/TF INT8 quantization int8: False # (bool) CoreML/TF INT8 quantization
dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
simplify: False # (bool) ONNX: simplify model simplify: False # (bool) ONNX: simplify model using `onnxslim`
opset: # (int, optional) ONNX: opset version opset: # (int, optional) ONNX: opset version
workspace: 4 # (int) TensorRT: workspace size (GB) workspace: 4 # (int) TensorRT: workspace size (GB)
nms: False # (bool) CoreML: add NMS nms: False # (bool) CoreML: add NMS
# Hyperparameters ------------------------------------------------------------------------------------------------------ # Hyperparameters ------------------------------------------------------------------------------------------------------
lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3) lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
lrf: 0.01 # (float) final learning rate (lr0 * lrf) lrf: 0.01 # (float) final learning rate (lr0 * lrf)
momentum: 0.937 # (float) SGD momentum/Adam beta1 momentum: 0.937 # (float) SGD momentum/Adam beta1
weight_decay: 0.0005 # (float) optimizer weight decay 5e-4 weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
warmup_epochs: 3.0 # (float) warmup epochs (fractions ok) warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
warmup_momentum: 0.8 # (float) warmup initial momentum warmup_momentum: 0.8 # (float) warmup initial momentum
warmup_bias_lr: 0.1 # (float) warmup initial bias lr warmup_bias_lr: 0.1 # (float) warmup initial bias lr
box: 7.5 # (float) box loss gain box: 7.5 # (float) box loss gain
cls: 0.5 # (float) cls loss gain (scale with pixels) cls: 0.5 # (float) cls loss gain (scale with pixels)
dfl: 1.5 # (float) dfl loss gain dfl: 1.5 # (float) dfl loss gain
pose: 12.0 # (float) pose loss gain pose: 12.0 # (float) pose loss gain
kobj: 1.0 # (float) keypoint obj loss gain kobj: 1.0 # (float) keypoint obj loss gain
label_smoothing: 0.0 # (float) label smoothing (fraction) label_smoothing: 0.0 # (float) label smoothing (fraction)
nbs: 64 # (int) nominal batch size nbs: 64 # (int) nominal batch size
hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction) hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction) hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction) hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
degrees: 0.0 # (float) image rotation (+/- deg) degrees: 0.0 # (float) image rotation (+/- deg)
translate: 0.1 # (float) image translation (+/- fraction) translate: 0.1 # (float) image translation (+/- fraction)
scale: 0.5 # (float) image scale (+/- gain) scale: 0.5 # (float) image scale (+/- gain)
shear: 0.0 # (float) image shear (+/- deg) shear: 0.0 # (float) image shear (+/- deg)
perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001 perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
flipud: 0.0 # (float) image flip up-down (probability) flipud: 0.0 # (float) image flip up-down (probability)
fliplr: 0.5 # (float) image flip left-right (probability) fliplr: 0.5 # (float) image flip left-right (probability)
mosaic: 1.0 # (float) image mosaic (probability) bgr: 0.0 # (float) image channel BGR (probability)
mixup: 0.0 # (float) image mixup (probability) mosaic: 1.0 # (float) image mosaic (probability)
copy_paste: 0.0 # (float) segment copy-paste (probability) mixup: 0.0 # (float) image mixup (probability)
copy_paste: 0.0 # (float) segment copy-paste (probability)
auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
# Custom config.yaml --------------------------------------------------------------------------------------------------- # Custom config.yaml ---------------------------------------------------------------------------------------------------
cfg: # (str, optional) for overriding defaults.yaml cfg: # (str, optional) for overriding defaults.yaml
# Tracker settings ------------------------------------------------------------------------------------------------------ # Tracker settings ------------------------------------------------------------------------------------------------------
tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml] tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]

View File

@ -14,8 +14,7 @@ Model `*.yaml` files may be used directly in the Command Line Interface (CLI) wi
yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100 yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
``` ```
They may also be used directly in a Python environment, and accepts the same They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
[arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
```python ```python
from ultralytics import YOLO from ultralytics import YOLO

View File

@ -2,49 +2,49 @@
# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
l: [1.00, 1.00, 1024] l: [1.00, 1.00, 1024]
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, HGStem, [32, 48]] # 0-P2/4 - [-1, 1, HGStem, [32, 48]] # 0-P2/4
- [-1, 6, HGBlock, [48, 128, 3]] # stage 1 - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
- [-1, 6, HGBlock, [96, 512, 3]] # stage 2 - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16 - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
- [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
- [-1, 6, HGBlock, [192, 1024, 5, True, True]] - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
- [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3 - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32 - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
- [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4 - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
head: head:
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2 - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
- [-1, 1, AIFI, [1024, 8]] - [-1, 1, AIFI, [1024, 8]]
- [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0 - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1 - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
- [[-2, -1], 1, Concat, [1]] - [[-2, -1], 1, Concat, [1]]
- [-1, 3, RepC3, [256]] # 16, fpn_blocks.0 - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
- [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1 - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0 - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
- [[-2, -1], 1, Concat, [1]] # cat backbone P4 - [[-2, -1], 1, Concat, [1]] # cat backbone P4
- [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1 - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
- [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0 - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
- [[-1, 17], 1, Concat, [1]] # cat Y4 - [[-1, 17], 1, Concat, [1]] # cat Y4
- [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0 - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
- [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1 - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
- [[-1, 12], 1, Concat, [1]] # cat Y5 - [[-1, 12], 1, Concat, [1]] # cat Y5
- [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1 - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
- [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,42 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels]
l: [1.00, 1.00, 1024]
backbone:
# [from, repeats, module, args]
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
head:
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
- [-1, 1, AIFI, [1024, 8]]
- [-1, 1, Conv, [256, 1, 1]] # 7
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
- [[-2, -1], 1, Concat, [1]]
- [-1, 3, RepC3, [256]] # 11
- [-1, 1, Conv, [256, 1, 1]] # 12
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
- [[-1, 12], 1, Concat, [1]] # cat Y4
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
- [[-1, 7], 1, Concat, [1]] # cat Y5
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,42 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels]
l: [1.00, 1.00, 1024]
backbone:
# [from, repeats, module, args]
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
head:
- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
- [-1, 1, AIFI, [1024, 8]]
- [-1, 1, Conv, [256, 1, 1]] # 7
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
- [[-2, -1], 1, Concat, [1]]
- [-1, 3, RepC3, [256]] # 11
- [-1, 1, Conv, [256, 1, 1]] # 12
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
- [[-2, -1], 1, Concat, [1]] # cat backbone P4
- [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
- [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
- [[-1, 12], 1, Concat, [1]] # cat Y4
- [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
- [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
- [[-1, 7], 1, Concat, [1]] # cat Y5
- [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
- [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

View File

@ -2,53 +2,53 @@
# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
x: [1.00, 1.00, 2048] x: [1.00, 1.00, 2048]
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, HGStem, [32, 64]] # 0-P2/4 - [-1, 1, HGStem, [32, 64]] # 0-P2/4
- [-1, 6, HGBlock, [64, 128, 3]] # stage 1 - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8 - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
- [-1, 6, HGBlock, [128, 512, 3]] - [-1, 6, HGBlock, [128, 512, 3]]
- [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2 - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16 - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
- [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
- [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3 - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32 - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
- [-1, 6, HGBlock, [512, 2048, 5, True, False]] - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
- [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4 - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
head: head:
- [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2 - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
- [-1, 1, AIFI, [2048, 8]] - [-1, 1, AIFI, [2048, 8]]
- [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0 - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1 - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
- [[-2, -1], 1, Concat, [1]] - [[-2, -1], 1, Concat, [1]]
- [-1, 3, RepC3, [384]] # 20, fpn_blocks.0 - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
- [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1 - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0 - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
- [[-2, -1], 1, Concat, [1]] # cat backbone P4 - [[-2, -1], 1, Concat, [1]] # cat backbone P4
- [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1 - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
- [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0 - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
- [[-1, 21], 1, Concat, [1]] # cat Y4 - [[-1, 21], 1, Concat, [1]] # cat Y4
- [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0 - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
- [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1 - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
- [[-1, 16], 1, Concat, [1]] # cat Y5 - [[-1, 16], 1, Concat, [1]] # cat Y5
- [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1 - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
- [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5) - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,40 @@
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
b: [0.67, 1.00, 512]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2fCIB, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2fCIB, [512, True]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,40 @@
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2fCIB, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2fCIB, [512, True]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,43 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2fCIB, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,40 @@
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,39 @@
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
s: [0.33, 0.50, 1024]
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2fCIB, [1024, True, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,40 @@
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
x: [1.00, 1.25, 512]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2fCIB, [512, True]]
- [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2fCIB, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
- [-1, 1, PSA, [1024]] # 10
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2fCIB, [512, True]] # 13
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 16 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 13], 1, Concat, [1]] # cat head P4
- [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
- [-1, 1, SCDown, [512, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
- [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -2,47 +2,45 @@
# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
# darknet53 backbone # darknet53 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0 - [-1, 1, Conv, [32, 3, 1]] # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
[-1, 1, Bottleneck, [64]], - [-1, 1, Bottleneck, [64]]
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
[-1, 2, Bottleneck, [128]], - [-1, 2, Bottleneck, [128]]
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
[-1, 8, Bottleneck, [256]], - [-1, 8, Bottleneck, [256]]
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
[-1, 8, Bottleneck, [512]], - [-1, 8, Bottleneck, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10 - [-1, 4, Bottleneck, [1024]] # 10
]
# YOLOv3-SPP head # YOLOv3-SPP head
head: head:
[[-1, 1, Bottleneck, [1024, False]], - [-1, 1, Bottleneck, [1024, False]]
[-1, 1, SPP, [512, [5, 9, 13]]], - [-1, 1, SPP, [512, [5, 9, 13]]]
[-1, 1, Conv, [1024, 3, 1]], - [-1, 1, Conv, [1024, 3, 1]]
[-1, 1, Conv, [512, 1, 1]], - [-1, 1, Conv, [512, 1, 1]]
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]], - [-2, 1, Conv, [256, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 8], 1, Concat, [1]], # cat backbone P4 - [[-1, 8], 1, Concat, [1]] # cat backbone P4
[-1, 1, Bottleneck, [512, False]], - [-1, 1, Bottleneck, [512, False]]
[-1, 1, Bottleneck, [512, False]], - [-1, 1, Bottleneck, [512, False]]
[-1, 1, Conv, [256, 1, 1]], - [-1, 1, Conv, [256, 1, 1]]
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]], - [-2, 1, Conv, [128, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 6], 1, Concat, [1]], # cat backbone P3 - [[-1, 6], 1, Concat, [1]] # cat backbone P3
[-1, 1, Bottleneck, [256, False]], - [-1, 1, Bottleneck, [256, False]]
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
]

View File

@ -2,38 +2,36 @@
# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
# YOLOv3-tiny backbone # YOLOv3-tiny backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [16, 3, 1]], # 0 - [-1, 1, Conv, [16, 3, 1]] # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
[-1, 1, Conv, [32, 3, 1]], - [-1, 1, Conv, [32, 3, 1]]
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
[-1, 1, Conv, [64, 3, 1]], - [-1, 1, Conv, [64, 3, 1]]
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
[-1, 1, Conv, [128, 3, 1]], - [-1, 1, Conv, [128, 3, 1]]
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
[-1, 1, Conv, [256, 3, 1]], - [-1, 1, Conv, [256, 3, 1]]
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
[-1, 1, Conv, [512, 3, 1]], - [-1, 1, Conv, [512, 3, 1]]
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
]
# YOLOv3-tiny head # YOLOv3-tiny head
head: head:
[[-1, 1, Conv, [1024, 3, 1]], - [-1, 1, Conv, [1024, 3, 1]]
[-1, 1, Conv, [256, 1, 1]], - [-1, 1, Conv, [256, 1, 1]]
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]], - [-2, 1, Conv, [128, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 8], 1, Concat, [1]], # cat backbone P4 - [[-1, 8], 1, Concat, [1]] # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc]], # Detect(P4, P5) - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
]

View File

@ -2,47 +2,45 @@
# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3 # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple width_multiple: 1.0 # layer channel multiple
# darknet53 backbone # darknet53 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [32, 3, 1]], # 0 - [-1, 1, Conv, [32, 3, 1]] # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
[-1, 1, Bottleneck, [64]], - [-1, 1, Bottleneck, [64]]
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
[-1, 2, Bottleneck, [128]], - [-1, 2, Bottleneck, [128]]
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
[-1, 8, Bottleneck, [256]], - [-1, 8, Bottleneck, [256]]
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
[-1, 8, Bottleneck, [512]], - [-1, 8, Bottleneck, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10 - [-1, 4, Bottleneck, [1024]] # 10
]
# YOLOv3 head # YOLOv3 head
head: head:
[[-1, 1, Bottleneck, [1024, False]], - [-1, 1, Bottleneck, [1024, False]]
[-1, 1, Conv, [512, 1, 1]], - [-1, 1, Conv, [512, 1, 1]]
[-1, 1, Conv, [1024, 3, 1]], - [-1, 1, Conv, [1024, 3, 1]]
[-1, 1, Conv, [512, 1, 1]], - [-1, 1, Conv, [512, 1, 1]]
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]], - [-2, 1, Conv, [256, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 8], 1, Concat, [1]], # cat backbone P4 - [[-1, 8], 1, Concat, [1]] # cat backbone P4
[-1, 1, Bottleneck, [512, False]], - [-1, 1, Bottleneck, [512, False]]
[-1, 1, Bottleneck, [512, False]], - [-1, 1, Bottleneck, [512, False]]
[-1, 1, Conv, [256, 1, 1]], - [-1, 1, Conv, [256, 1, 1]]
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]], - [-2, 1, Conv, [128, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 6], 1, Concat, [1]], # cat backbone P3 - [[-1, 6], 1, Concat, [1]] # cat backbone P3
[-1, 1, Bottleneck, [256, False]], - [-1, 1, Bottleneck, [256, False]]
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5) - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
]

View File

@ -2,7 +2,7 @@
# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5 # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -14,48 +14,46 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will ca
# YOLOv5 v6.0 backbone # YOLOv5 v6.0 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
[-1, 3, C3, [128]], - [-1, 3, C3, [128]]
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
[-1, 6, C3, [256]], - [-1, 6, C3, [256]]
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
[-1, 9, C3, [512]], - [-1, 9, C3, [512]]
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32 - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
[-1, 3, C3, [768]], - [-1, 3, C3, [768]]
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
[-1, 3, C3, [1024]], - [-1, 3, C3, [1024]]
[-1, 1, SPPF, [1024, 5]], # 11 - [-1, 1, SPPF, [1024, 5]] # 11
]
# YOLOv5 v6.0 head # YOLOv5 v6.0 head
head: head:
[[-1, 1, Conv, [768, 1, 1]], - [-1, 1, Conv, [768, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 8], 1, Concat, [1]], # cat backbone P5 - [[-1, 8], 1, Concat, [1]] # cat backbone P5
[-1, 3, C3, [768, False]], # 15 - [-1, 3, C3, [768, False]] # 15
[-1, 1, Conv, [512, 1, 1]], - [-1, 1, Conv, [512, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 6], 1, Concat, [1]], # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
[-1, 3, C3, [512, False]], # 19 - [-1, 3, C3, [512, False]] # 19
[-1, 1, Conv, [256, 1, 1]], - [-1, 1, Conv, [256, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 4], 1, Concat, [1]], # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small) - [-1, 3, C3, [256, False]] # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]], - [-1, 1, Conv, [256, 3, 2]]
[[-1, 20], 1, Concat, [1]], # cat head P4 - [[-1, 20], 1, Concat, [1]] # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium) - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]], - [-1, 1, Conv, [512, 3, 2]]
[[-1, 16], 1, Concat, [1]], # cat head P5 - [[-1, 16], 1, Concat, [1]] # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large) - [-1, 3, C3, [768, False]] # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]], - [-1, 1, Conv, [768, 3, 2]]
[[-1, 12], 1, Concat, [1]], # cat head P6 - [[-1, 12], 1, Concat, [1]] # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc]], # Detect(P3, P4, P5, P6) - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
]

View File

@ -2,7 +2,7 @@
# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5 # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -14,37 +14,35 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call
# YOLOv5 v6.0 backbone # YOLOv5 v6.0 backbone
backbone: backbone:
# [from, number, module, args] # [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
[-1, 3, C3, [128]], - [-1, 3, C3, [128]]
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
[-1, 6, C3, [256]], - [-1, 6, C3, [256]]
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
[-1, 9, C3, [512]], - [-1, 9, C3, [512]]
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
[-1, 3, C3, [1024]], - [-1, 3, C3, [1024]]
[-1, 1, SPPF, [1024, 5]], # 9 - [-1, 1, SPPF, [1024, 5]] # 9
]
# YOLOv5 v6.0 head # YOLOv5 v6.0 head
head: head:
[[-1, 1, Conv, [512, 1, 1]], - [-1, 1, Conv, [512, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 6], 1, Concat, [1]], # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
[-1, 3, C3, [512, False]], # 13 - [-1, 3, C3, [512, False]] # 13
[-1, 1, Conv, [256, 1, 1]], - [-1, 1, Conv, [256, 1, 1]]
[-1, 1, nn.Upsample, [None, 2, 'nearest']], - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
[[-1, 4], 1, Concat, [1]], # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small) - [-1, 3, C3, [256, False]] # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]], - [-1, 1, Conv, [256, 3, 2]]
[[-1, 14], 1, Concat, [1]], # cat head P4 - [[-1, 14], 1, Concat, [1]] # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium) - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]], - [-1, 1, Conv, [512, 3, 2]]
[[-1, 10], 1, Concat, [1]], # cat head P5 - [[-1, 10], 1, Concat, [1]] # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large) - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5) - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
]

View File

@ -2,8 +2,8 @@
# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6 # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
activation: nn.ReLU() # (optional) model default activation function activation: nn.ReLU() # (optional) model default activation function
scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -15,39 +15,39 @@ scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call
# YOLOv6-3.0s backbone # YOLOv6-3.0s backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 6, Conv, [128, 3, 1]] - [-1, 6, Conv, [128, 3, 1]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 12, Conv, [256, 3, 1]] - [-1, 12, Conv, [256, 3, 1]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 18, Conv, [512, 3, 1]] - [-1, 18, Conv, [512, 3, 1]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 6, Conv, [1024, 3, 1]] - [-1, 6, Conv, [1024, 3, 1]]
- [-1, 1, SPPF, [1024, 5]] # 9 - [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv6-3.0s head # YOLOv6-3.0s head
head: head:
- [-1, 1, Conv, [256, 1, 1]] - [-1, 1, Conv, [256, 1, 1]]
- [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]] - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 1, Conv, [256, 3, 1]] - [-1, 1, Conv, [256, 3, 1]]
- [-1, 9, Conv, [256, 3, 1]] # 14 - [-1, 9, Conv, [256, 3, 1]] # 14
- [-1, 1, Conv, [128, 1, 1]] - [-1, 1, Conv, [128, 1, 1]]
- [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]] - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 1, Conv, [128, 3, 1]] - [-1, 1, Conv, [128, 3, 1]]
- [-1, 9, Conv, [128, 3, 1]] # 19 - [-1, 9, Conv, [128, 3, 1]] # 19
- [-1, 1, Conv, [128, 3, 2]] - [-1, 1, Conv, [128, 3, 2]]
- [[-1, 15], 1, Concat, [1]] # cat head P4 - [[-1, 15], 1, Concat, [1]] # cat head P4
- [-1, 1, Conv, [256, 3, 1]] - [-1, 1, Conv, [256, 3, 1]]
- [-1, 9, Conv, [256, 3, 1]] # 23 - [-1, 9, Conv, [256, 3, 1]] # 23
- [-1, 1, Conv, [256, 3, 2]] - [-1, 1, Conv, [256, 3, 2]]
- [[-1, 10], 1, Concat, [1]] # cat head P5 - [[-1, 10], 1, Concat, [1]] # cat head P5
- [-1, 1, Conv, [512, 3, 1]] - [-1, 1, Conv, [512, 3, 1]]
- [-1, 9, Conv, [512, 3, 1]] # 27 - [-1, 9, Conv, [512, 3, 1]] # 27
- [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5) - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,25 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 1000 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
s: [0.33, 0.50, 1024]
m: [0.67, 0.75, 1024]
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
- [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
# YOLOv8.0n head
head:
- [-1, 1, Classify, [nc]] # Classify

View File

@ -0,0 +1,25 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
# Parameters
nc: 1000 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024]
s: [0.33, 0.50, 1024]
m: [0.67, 0.75, 1024]
l: [1.00, 1.00, 1024]
x: [1.00, 1.25, 1024]
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
- [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
- [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
- [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
- [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
# YOLOv8.0n head
head:
- [-1, 1, Classify, [nc]] # Classify

View File

@ -2,7 +2,7 @@
# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify # YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
# Parameters # Parameters
nc: 1000 # number of classes nc: 1000 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -14,16 +14,16 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will c
# YOLOv8.0n backbone # YOLOv8.0n backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]] - [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]] - [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]] - [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]] - [-1, 3, C2f, [1024, True]]
# YOLOv8.0n head # YOLOv8.0n head
head: head:
- [-1, 1, Classify, [nc]] # Classify - [-1, 1, Classify, [nc]] # Classify

View File

@ -0,0 +1,54 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters, 9031712 gradients, 42.8 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients, 69.1 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
# YOLOv8.0-ghost backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C3Ghost, [128, True]]
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C3Ghost, [256, True]]
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C3Ghost, [512, True]]
- [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C3Ghost, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0-ghost-p2 head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C3Ghost, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 2], 1, Concat, [1]] # cat backbone P2
- [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
- [-1, 1, GhostConv, [128, 3, 2]]
- [[-1, 15], 1, Concat, [1]] # cat head P3
- [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
- [-1, 1, GhostConv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
- [-1, 1, GhostConv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
- [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)

View File

@ -0,0 +1,56 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
# YOLOv8.0-ghost backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C3Ghost, [128, True]]
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C3Ghost, [256, True]]
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C3Ghost, [512, True]]
- [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
- [-1, 3, C3Ghost, [768, True]]
- [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
- [-1, 3, C3Ghost, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 11
# YOLOv8.0-ghost-p6 head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 8], 1, Concat, [1]] # cat backbone P5
- [-1, 3, C3Ghost, [768]] # 14
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C3Ghost, [512]] # 17
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
- [-1, 1, GhostConv, [256, 3, 2]]
- [[-1, 17], 1, Concat, [1]] # cat head P4
- [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
- [-1, 1, GhostConv, [512, 3, 2]]
- [[-1, 14], 1, Concat, [1]] # cat head P5
- [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
- [-1, 1, GhostConv, [768, 3, 2]]
- [[-1, 11], 1, Concat, [1]] # cat head P6
- [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
- [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)

View File

@ -0,0 +1,47 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients, 32.7 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients, 53.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients, 83.3 GFLOPs
# YOLOv8.0n-ghost backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C3Ghost, [128, True]]
- [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C3Ghost, [256, True]]
- [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C3Ghost, [512, True]]
- [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C3Ghost, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C3Ghost, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
- [-1, 1, GhostConv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
- [-1, 1, GhostConv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
- [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)

View File

@ -0,0 +1,46 @@
# Ultralytics YOLO 🚀, AGPL-3.0 license
# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels]
n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
# YOLOv8.0n backbone
backbone:
# [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head
head:
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 18 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 21 (P5/32-large)
- [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)

View File

@ -2,7 +2,7 @@
# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -14,41 +14,41 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call
# YOLOv8.0 backbone # YOLOv8.0 backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]] - [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]] - [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]] - [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]] - [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9 - [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0-p2 head # YOLOv8.0-p2 head
head: head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12 - [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small) - [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 2], 1, Concat, [1]] # cat backbone P2 - [[-1, 2], 1, Concat, [1]] # cat backbone P2
- [-1, 3, C2f, [128]] # 18 (P2/4-xsmall) - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
- [-1, 1, Conv, [128, 3, 2]] - [-1, 1, Conv, [128, 3, 2]]
- [[-1, 15], 1, Concat, [1]] # cat head P3 - [[-1, 15], 1, Concat, [1]] # cat head P3
- [-1, 3, C2f, [256]] # 21 (P3/8-small) - [-1, 3, C2f, [256]] # 21 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]] - [-1, 1, Conv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4 - [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 24 (P4/16-medium) - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]] - [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5 - [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 27 (P5/32-large) - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
- [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5) - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)

View File

@ -2,7 +2,7 @@
# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
# Parameters # Parameters
nc: 80 # number of classes nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -14,43 +14,43 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will ca
# YOLOv8.0x6 backbone # YOLOv8.0x6 backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]] - [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]] - [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]] - [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [768, True]] - [-1, 3, C2f, [768, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
- [-1, 3, C2f, [1024, True]] - [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 11 - [-1, 1, SPPF, [1024, 5]] # 11
# YOLOv8.0x6 head # YOLOv8.0x6 head
head: head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 8], 1, Concat, [1]] # cat backbone P5 - [[-1, 8], 1, Concat, [1]] # cat backbone P5
- [-1, 3, C2, [768, False]] # 14 - [-1, 3, C2, [768, False]] # 14
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2, [512, False]] # 17 - [-1, 3, C2, [512, False]] # 17
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2, [256, False]] # 20 (P3/8-small) - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]] - [-1, 1, Conv, [256, 3, 2]]
- [[-1, 17], 1, Concat, [1]] # cat head P4 - [[-1, 17], 1, Concat, [1]] # cat head P4
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium) - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]] - [-1, 1, Conv, [512, 3, 2]]
- [[-1, 14], 1, Concat, [1]] # cat head P5 - [[-1, 14], 1, Concat, [1]] # cat head P5
- [-1, 3, C2, [768, False]] # 26 (P5/32-large) - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
- [-1, 1, Conv, [768, 3, 2]] - [-1, 1, Conv, [768, 3, 2]]
- [[-1, 11], 1, Concat, [1]] # cat head P6 - [[-1, 11], 1, Concat, [1]] # cat head P6
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
- [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)

View File

@ -2,8 +2,8 @@
# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
# Parameters # Parameters
nc: 1 # number of classes nc: 1 # number of classes
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -15,43 +15,43 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will ca
# YOLOv8.0x6 backbone # YOLOv8.0x6 backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]] - [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]] - [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]] - [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [768, True]] - [-1, 3, C2f, [768, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64 - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
- [-1, 3, C2f, [1024, True]] - [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 11 - [-1, 1, SPPF, [1024, 5]] # 11
# YOLOv8.0x6 head # YOLOv8.0x6 head
head: head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 8], 1, Concat, [1]] # cat backbone P5 - [[-1, 8], 1, Concat, [1]] # cat backbone P5
- [-1, 3, C2, [768, False]] # 14 - [-1, 3, C2, [768, False]] # 14
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2, [512, False]] # 17 - [-1, 3, C2, [512, False]] # 17
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2, [256, False]] # 20 (P3/8-small) - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]] - [-1, 1, Conv, [256, 3, 2]]
- [[-1, 17], 1, Concat, [1]] # cat head P4 - [[-1, 17], 1, Concat, [1]] # cat head P4
- [-1, 3, C2, [512, False]] # 23 (P4/16-medium) - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]] - [-1, 1, Conv, [512, 3, 2]]
- [[-1, 14], 1, Concat, [1]] # cat head P5 - [[-1, 14], 1, Concat, [1]] # cat head P5
- [-1, 3, C2, [768, False]] # 26 (P5/32-large) - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
- [-1, 1, Conv, [768, 3, 2]] - [-1, 1, Conv, [768, 3, 2]]
- [[-1, 11], 1, Concat, [1]] # cat head P6 - [[-1, 11], 1, Concat, [1]] # cat head P6
- [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge) - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
- [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6) - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)

View File

@ -2,8 +2,8 @@
# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
# Parameters # Parameters
nc: 1 # number of classes nc: 1 # number of classes
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n' scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
# [depth, width, max_channels] # [depth, width, max_channels]
n: [0.33, 0.25, 1024] n: [0.33, 0.25, 1024]
@ -15,33 +15,33 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will
# YOLOv8.0n backbone # YOLOv8.0n backbone
backbone: backbone:
# [from, repeats, module, args] # [from, repeats, module, args]
- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
- [-1, 3, C2f, [128, True]] - [-1, 3, C2f, [128, True]]
- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
- [-1, 6, C2f, [256, True]] - [-1, 6, C2f, [256, True]]
- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
- [-1, 6, C2f, [512, True]] - [-1, 6, C2f, [512, True]]
- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
- [-1, 3, C2f, [1024, True]] - [-1, 3, C2f, [1024, True]]
- [-1, 1, SPPF, [1024, 5]] # 9 - [-1, 1, SPPF, [1024, 5]] # 9
# YOLOv8.0n head # YOLOv8.0n head
head: head:
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [[-1, 6], 1, Concat, [1]] # cat backbone P4
- [-1, 3, C2f, [512]] # 12 - [-1, 3, C2f, [512]] # 12
- [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
- [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [[-1, 4], 1, Concat, [1]] # cat backbone P3
- [-1, 3, C2f, [256]] # 15 (P3/8-small) - [-1, 3, C2f, [256]] # 15 (P3/8-small)
- [-1, 1, Conv, [256, 3, 2]] - [-1, 1, Conv, [256, 3, 2]]
- [[-1, 12], 1, Concat, [1]] # cat head P4 - [[-1, 12], 1, Concat, [1]] # cat head P4
- [-1, 3, C2f, [512]] # 18 (P4/16-medium) - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
- [-1, 1, Conv, [512, 3, 2]] - [-1, 1, Conv, [512, 3, 2]]
- [[-1, 9], 1, Concat, [1]] # cat head P5 - [[-1, 9], 1, Concat, [1]] # cat head P5
- [-1, 3, C2f, [1024]] # 21 (P5/32-large) - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
- [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5) - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)

Some files were not shown because too many files have changed in this diff Show More