add yolo v10 and modify pipeline

2025-03-28 13:19:54 +08:00
parent 183299c06b
commit 798c596acc
471 changed files with 19109 additions and 7342 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -1,12 +1,27 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-__version__ = '8.0.173'
+__version__ = "8.1.34"

-from ultralytics.models import RTDETR, SAM, YOLO
+from ultralytics.data.explorer.explorer import Explorer
+from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
 from ultralytics.models.fastsam import FastSAM
 from ultralytics.models.nas import NAS
-from ultralytics.utils import SETTINGS as settings
+from ultralytics.utils import ASSETS, SETTINGS as settings
 from ultralytics.utils.checks import check_yolo as checks
 from ultralytics.utils.downloads import download

-__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings'
+__all__ = (
+    "__version__",
+    "ASSETS",
+    "YOLO",
+    "YOLOWorld",
+    "NAS",
+    "SAM",
+    "FastSAM",
+    "RTDETR",
+    "checks",
+    "download",
+    "settings",
+    "Explorer",
+    "YOLOv10"
+)
--- a/ultralytics/pycache/init.cpython-312.pyc
+++ b/ultralytics/pycache/init.cpython-312.pyc
--- a/ultralytics/pycache/init.cpython-39.pyc
+++ b/ultralytics/pycache/init.cpython-39.pyc
--- a/ultralytics/cfg/init.py
+++ b/ultralytics/cfg/init.py
@ -1,34 +1,62 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

 import contextlib
-import re
+import os
 import shutil
+import subprocess
 import sys
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Dict, List, Union
+import re

-from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, SETTINGS,
-                               SETTINGS_YAML, IterableSimpleNamespace, __version__, checks, colorstr, deprecation_warn,
-                               yaml_load, yaml_print)
+from ultralytics.utils import (
+    ASSETS,
+    DEFAULT_CFG,
+    DEFAULT_CFG_DICT,
+    DEFAULT_CFG_PATH,
+    LOGGER,
+    RANK,
+    ROOT,
+    RUNS_DIR,
+    SETTINGS,
+    SETTINGS_YAML,
+    TESTS_RUNNING,
+    IterableSimpleNamespace,
+    __version__,
+    checks,
+    colorstr,
+    deprecation_warn,
+    yaml_load,
+    yaml_print,
+)

 # Define valid tasks and modes
-MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
-TASKS = 'detect', 'segment', 'classify', 'pose'
-TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'}
+MODES = {"train", "val", "predict", "export", "track", "benchmark"}
+TASKS = {"detect", "segment", "classify", "pose", "obb"}
+TASK2DATA = {
+    "detect": "coco8.yaml",
+    "segment": "coco8-seg.yaml",
+    "classify": "imagenet10",
+    "pose": "coco8-pose.yaml",
+    "obb": "dota8.yaml",
+}
 TASK2MODEL = {
-    'detect': 'yolov8n.pt',
-    'segment': 'yolov8n-seg.pt',
-    'classify': 'yolov8n-cls.pt',
-    'pose': 'yolov8n-pose.pt'}
+    "detect": "yolov8n.pt",
+    "segment": "yolov8n-seg.pt",
+    "classify": "yolov8n-cls.pt",
+    "pose": "yolov8n-pose.pt",
+    "obb": "yolov8n-obb.pt",
+}
 TASK2METRIC = {
-    'detect': 'metrics/mAP50-95(B)',
-    'segment': 'metrics/mAP50-95(M)',
-    'classify': 'metrics/accuracy_top1',
-    'pose': 'metrics/mAP50-95(P)'}
+    "detect": "metrics/mAP50-95(B)",
+    "segment": "metrics/mAP50-95(M)",
+    "classify": "metrics/accuracy_top1",
+    "pose": "metrics/mAP50-95(P)",
+    "obb": "metrics/mAP50-95(B)",
+}

-CLI_HELP_MSG = \
-    f"""
+CLI_HELP_MSG = f"""
    Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS
@ -42,7 +70,7 @@ CLI_HELP_MSG = \
        yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01

    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
-        yolo predict model=yolov8n-seg.pt source='https://youtu.be/Zgi9g1ksQHc' imgsz=320
+        yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320

    3. Val a pretrained detection model at batch-size 1 and image size 640:
        yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
@ -50,6 +78,9 @@ CLI_HELP_MSG = \
    4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
        yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128

+    6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
+        yolo explorer
+
    5. Run special commands:
        yolo help
        yolo checks
@ -64,16 +95,84 @@ CLI_HELP_MSG = \
    """

 # Define keys for arg type checks
-CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'
-CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr',
-                     'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud',
-                     'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction')  # fraction floats 0.0 - 1.0
-CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride',
-                'line_width', 'workspace', 'nbs', 'save_period')
-CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',
-                 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop',
-                 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks', 'boxes', 'keras',
-                 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile')
+CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
+CFG_FRACTION_KEYS = {
+    "dropout",
+    "iou",
+    "lr0",
+    "lrf",
+    "momentum",
+    "weight_decay",
+    "warmup_momentum",
+    "warmup_bias_lr",
+    "label_smoothing",
+    "hsv_h",
+    "hsv_s",
+    "hsv_v",
+    "translate",
+    "scale",
+    "perspective",
+    "flipud",
+    "fliplr",
+    "bgr",
+    "mosaic",
+    "mixup",
+    "copy_paste",
+    "conf",
+    "iou",
+    "fraction",
+}  # fraction floats 0.0 - 1.0
+CFG_INT_KEYS = {
+    "epochs",
+    "patience",
+    "batch",
+    "workers",
+    "seed",
+    "close_mosaic",
+    "mask_ratio",
+    "max_det",
+    "vid_stride",
+    "line_width",
+    "workspace",
+    "nbs",
+    "save_period",
+}
+CFG_BOOL_KEYS = {
+    "save",
+    "exist_ok",
+    "verbose",
+    "deterministic",
+    "single_cls",
+    "rect",
+    "cos_lr",
+    "overlap_mask",
+    "val",
+    "save_json",
+    "save_hybrid",
+    "half",
+    "dnn",
+    "plots",
+    "show",
+    "save_txt",
+    "save_conf",
+    "save_crop",
+    "save_frames",
+    "show_labels",
+    "show_conf",
+    "visualize",
+    "augment",
+    "agnostic_nms",
+    "retina_masks",
+    "show_boxes",
+    "keras",
+    "optimize",
+    "int8",
+    "dynamic",
+    "simplify",
+    "nms",
+    "profile",
+    "multi_scale",
+}


 def cfg2dict(cfg):
@ -109,53 +208,72 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
    # Merge overrides
    if overrides:
        overrides = cfg2dict(overrides)
-        if 'save_dir' not in cfg:
-            overrides.pop('save_dir', None)  # special override keys to ignore
+        if "save_dir" not in cfg:
+            overrides.pop("save_dir", None)  # special override keys to ignore
        check_dict_alignment(cfg, overrides)
        cfg = {**cfg, **overrides}  # merge cfg and overrides dicts (prefer overrides)

    # Special handling for numeric project/name
-    for k in 'project', 'name':
+    for k in "project", "name":
        if k in cfg and isinstance(cfg[k], (int, float)):
            cfg[k] = str(cfg[k])
-    if cfg.get('name') == 'model':  # assign model to 'name' arg
-        cfg['name'] = cfg.get('model', '').split('.')[0]
+    if cfg.get("name") == "model":  # assign model to 'name' arg
+        cfg["name"] = cfg.get("model", "").split(".")[0]
        LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")

    # Type and Value checks
-    for k, v in cfg.items():
-        if v is not None:  # None values may be from optional args
-            if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
-                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
-                                f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
-            elif k in CFG_FRACTION_KEYS:
-                if not isinstance(v, (int, float)):
-                    raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
-                                    f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
-                if not (0.0 <= v <= 1.0):
-                    raise ValueError(f"'{k}={v}' is an invalid value. "
-                                     f"Valid '{k}' values are between 0.0 and 1.0.")
-            elif k in CFG_INT_KEYS and not isinstance(v, int):
-                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
-                                f"'{k}' must be an int (i.e. '{k}=8')")
-            elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
-                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
-                                f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')")
+    check_cfg(cfg)

    # Return instance
    return IterableSimpleNamespace(**cfg)


+def check_cfg(cfg, hard=True):
+    """Check Ultralytics configuration argument types and values."""
+    for k, v in cfg.items():
+        if v is not None:  # None values may be from optional args
+            if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                    )
+                cfg[k] = float(v)
+            elif k in CFG_FRACTION_KEYS:
+                if not isinstance(v, (int, float)):
+                    if hard:
+                        raise TypeError(
+                            f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                            f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                        )
+                    cfg[k] = v = float(v)
+                if not (0.0 <= v <= 1.0):
+                    raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
+            elif k in CFG_INT_KEYS and not isinstance(v, int):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
+                    )
+                cfg[k] = int(v)
+            elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
+                    )
+                cfg[k] = bool(v)
+
+
 def get_save_dir(args, name=None):
    """Return save_dir as created from train/val/predict arguments."""

-    if getattr(args, 'save_dir', None):
+    if getattr(args, "save_dir", None):
        save_dir = args.save_dir
    else:
        from ultralytics.utils.files import increment_path

-        project = args.project or Path(SETTINGS['runs_dir']) / args.task
-        name = name or args.name or f'{args.mode}'
+        project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
+        name = name or args.name or f"{args.mode}"
        save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)

    return Path(save_dir)
@ -165,23 +283,26 @@ def _handle_deprecation(custom):
    """Hardcoded function to handle deprecated config keys."""

    for key in custom.copy().keys():
-        if key == 'hide_labels':
-            deprecation_warn(key, 'show_labels')
-            custom['show_labels'] = custom.pop('hide_labels') == 'False'
-        if key == 'hide_conf':
-            deprecation_warn(key, 'show_conf')
-            custom['show_conf'] = custom.pop('hide_conf') == 'False'
-        if key == 'line_thickness':
-            deprecation_warn(key, 'line_width')
-            custom['line_width'] = custom.pop('line_thickness')
+        if key == "boxes":
+            deprecation_warn(key, "show_boxes")
+            custom["show_boxes"] = custom.pop("boxes")
+        if key == "hide_labels":
+            deprecation_warn(key, "show_labels")
+            custom["show_labels"] = custom.pop("hide_labels") == "False"
+        if key == "hide_conf":
+            deprecation_warn(key, "show_conf")
+            custom["show_conf"] = custom.pop("hide_conf") == "False"
+        if key == "line_thickness":
+            deprecation_warn(key, "line_width")
+            custom["line_width"] = custom.pop("line_thickness")

    return custom


 def check_dict_alignment(base: Dict, custom: Dict, e=None):
    """
-    This function checks for any mismatched keys between a custom configuration list and a base configuration list.
-    If any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+    This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+    any mismatched keys are found, the function prints out similar keys from the base list and exits the program.

    Args:
        custom (dict): a dictionary of custom configuration options
@ -194,36 +315,35 @@ def check_dict_alignment(base: Dict, custom: Dict, e=None):
    if mismatched:
        from difflib import get_close_matches

-        string = ''
+        string = ""
        for x in mismatched:
            matches = get_close_matches(x, base_keys)  # key list
-            matches = [f'{k}={base[k]}' if base.get(k) is not None else k for k in matches]
-            match_str = f'Similar arguments are i.e. {matches}.' if matches else ''
+            matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
+            match_str = f"Similar arguments are i.e. {matches}." if matches else ""
            string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
        raise SyntaxError(string + CLI_HELP_MSG) from e


 def merge_equals_args(args: List[str]) -> List[str]:
    """
-    Merges arguments around isolated '=' args in a list of strings.
-    The function considers cases where the first argument ends with '=' or the second starts with '=',
-    as well as when the middle one is an equals sign.
+    Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+    argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.

    Args:
        args (List[str]): A list of strings where each element is an argument.

    Returns:
-        List[str]: A list of strings where the arguments around isolated '=' are merged.
+        (List[str]): A list of strings where the arguments around isolated '=' are merged.
    """
    new_args = []
    for i, arg in enumerate(args):
-        if arg == '=' and 0 < i < len(args) - 1:  # merge ['arg', '=', 'val']
-            new_args[-1] += f'={args[i + 1]}'
+        if arg == "=" and 0 < i < len(args) - 1:  # merge ['arg', '=', 'val']
+            new_args[-1] += f"={args[i + 1]}"
            del args[i + 1]
-        elif arg.endswith('=') and i < len(args) - 1 and '=' not in args[i + 1]:  # merge ['arg=', 'val']
-            new_args.append(f'{arg}{args[i + 1]}')
+        elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]:  # merge ['arg=', 'val']
+            new_args.append(f"{arg}{args[i + 1]}")
            del args[i + 1]
-        elif arg.startswith('=') and i > 0:  # merge ['arg', '=val']
+        elif arg.startswith("=") and i > 0:  # merge ['arg', '=val']
            new_args[-1] += arg
        else:
            new_args.append(arg)
@ -247,11 +367,11 @@ def handle_yolo_hub(args: List[str]) -> None:
    """
    from ultralytics import hub

-    if args[0] == 'login':
-        key = args[1] if len(args) > 1 else ''
+    if args[0] == "login":
+        key = args[1] if len(args) > 1 else ""
        # Log in to Ultralytics HUB using the provided API key
        hub.login(key)
-    elif args[0] == 'logout':
+    elif args[0] == "logout":
        # Log out from Ultralytics HUB
        hub.logout()

@ -271,39 +391,47 @@ def handle_yolo_settings(args: List[str]) -> None:
        python my_script.py yolo settings reset
        ```
    """
-    url = 'https://docs.ultralytics.com/quickstart/#ultralytics-settings'  # help URL
+    url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings"  # help URL
    try:
        if any(args):
-            if args[0] == 'reset':
+            if args[0] == "reset":
                SETTINGS_YAML.unlink()  # delete the settings file
                SETTINGS.reset()  # create new settings
-                LOGGER.info('Settings reset successfully')  # inform the user that settings have been reset
+                LOGGER.info("Settings reset successfully")  # inform the user that settings have been reset
            else:  # save a new setting
                new = dict(parse_key_value_pair(a) for a in args)
                check_dict_alignment(SETTINGS, new)
                SETTINGS.update(new)

-        LOGGER.info(f'💡 Learn about settings at {url}')
+        LOGGER.info(f"💡 Learn about settings at {url}")
        yaml_print(SETTINGS_YAML)  # print the current settings
    except Exception as e:
        LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")


+def handle_explorer():
+    """Open the Ultralytics Explorer GUI."""
+    checks.check_requirements("streamlit")
+    LOGGER.info("💡 Loading Explorer dashboard...")
+    subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
+
+
 def parse_key_value_pair(pair):
    """Parse one 'key=value' pair and return key and value."""
-    re.sub(r' *= *', '=', pair)  # remove spaces around equals sign
-    k, v = pair.split('=', 1)  # split on first '=' sign
+    k, v = pair.split("=", 1)  # split on first '=' sign
+    k, v = k.strip(), v.strip()  # remove spaces
    assert v, f"missing '{k}' value"
    return k, smart_value(v)


 def smart_value(v):
    """Convert a string to an underlying type such as int, float, bool, etc."""
-    if v.lower() == 'none':
+    v_lower = v.lower()
+    if v_lower == "none":
        return None
-    elif v.lower() == 'true':
+    elif v_lower == "true":
        return True
-    elif v.lower() == 'false':
+    elif v_lower == "false":
        return False
    else:
        with contextlib.suppress(Exception):
@ -311,7 +439,7 @@ def smart_value(v):
        return v


-def entrypoint(debug=''):
+def entrypoint(debug=""):
    """
    This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
    to the package.
@ -326,135 +454,160 @@ def entrypoint(debug=''):
    It uses the package's default cfg and initializes it using the passed overrides.
    Then it calls the CLI function with the composed cfg
    """
-    args = (debug.split(' ') if debug else sys.argv)[1:]
+    args = (debug.split(" ") if debug else sys.argv)[1:]
    if not args:  # no arguments passed
        LOGGER.info(CLI_HELP_MSG)
        return

    special = {
-        'help': lambda: LOGGER.info(CLI_HELP_MSG),
-        'checks': checks.check_yolo,
-        'version': lambda: LOGGER.info(__version__),
-        'settings': lambda: handle_yolo_settings(args[1:]),
-        'cfg': lambda: yaml_print(DEFAULT_CFG_PATH),
-        'hub': lambda: handle_yolo_hub(args[1:]),
-        'login': lambda: handle_yolo_hub(args),
-        'copy-cfg': copy_default_cfg}
+        "help": lambda: LOGGER.info(CLI_HELP_MSG),
+        "checks": checks.collect_system_info,
+        "version": lambda: LOGGER.info(__version__),
+        "settings": lambda: handle_yolo_settings(args[1:]),
+        "cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
+        "hub": lambda: handle_yolo_hub(args[1:]),
+        "login": lambda: handle_yolo_hub(args),
+        "copy-cfg": copy_default_cfg,
+        "explorer": lambda: handle_explorer(),
+    }
    full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}

-    # Define common mis-uses of special commands, i.e. -h, -help, --help
+    # Define common misuses of special commands, i.e. -h, -help, --help
    special.update({k[0]: v for k, v in special.items()})  # singular
-    special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith('s')})  # singular
-    special = {**special, **{f'-{k}': v for k, v in special.items()}, **{f'--{k}': v for k, v in special.items()}}
+    special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")})  # singular
+    special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}

    overrides = {}  # basic overrides, i.e. imgsz=320
    for a in merge_equals_args(args):  # merge spaces around '=' sign
-        if a.startswith('--'):
-            LOGGER.warning(f"WARNING ⚠️ '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
+        if a.startswith("--"):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
            a = a[2:]
-        if a.endswith(','):
-            LOGGER.warning(f"WARNING ⚠️ '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
+        if a.endswith(","):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
            a = a[:-1]
-        if '=' in a:
+        if "=" in a:
            try:
                k, v = parse_key_value_pair(a)
-                if k == 'cfg':  # custom.yaml passed
-                    LOGGER.info(f'Overriding {DEFAULT_CFG_PATH} with {v}')
-                    overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != 'cfg'}
+                if k == "cfg" and v is not None:  # custom.yaml passed
+                    LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
+                    overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
                else:
                    overrides[k] = v
            except (NameError, SyntaxError, ValueError, AssertionError) as e:
-                check_dict_alignment(full_args_dict, {a: ''}, e)
+                check_dict_alignment(full_args_dict, {a: ""}, e)

        elif a in TASKS:
-            overrides['task'] = a
+            overrides["task"] = a
        elif a in MODES:
-            overrides['mode'] = a
+            overrides["mode"] = a
        elif a.lower() in special:
            special[a.lower()]()
            return
        elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
            overrides[a] = True  # auto-True for default bool args, i.e. 'yolo show' sets show=True
        elif a in DEFAULT_CFG_DICT:
-            raise SyntaxError(f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
-                              f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}")
+            raise SyntaxError(
+                f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
+                f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
+            )
        else:
-            check_dict_alignment(full_args_dict, {a: ''})
+            check_dict_alignment(full_args_dict, {a: ""})

    # Check keys
    check_dict_alignment(full_args_dict, overrides)

    # Mode
-    mode = overrides.get('mode')
+    mode = overrides.get("mode")
    if mode is None:
-        mode = DEFAULT_CFG.mode or 'predict'
-        LOGGER.warning(f"WARNING ⚠️ 'mode' is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
+        mode = DEFAULT_CFG.mode or "predict"
+        LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
    elif mode not in MODES:
        raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")

    # Task
-    task = overrides.pop('task', None)
+    task = overrides.pop("task", None)
    if task:
        if task not in TASKS:
            raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
-        if 'model' not in overrides:
-            overrides['model'] = TASK2MODEL[task]
+        if "model" not in overrides:
+            overrides["model"] = TASK2MODEL[task]

    # Model
-    model = overrides.pop('model', DEFAULT_CFG.model)
+    model = overrides.pop("model", DEFAULT_CFG.model)
    if model is None:
-        model = 'yolov8n.pt'
-        LOGGER.warning(f"WARNING ⚠️ 'model' is missing. Using default 'model={model}'.")
-    overrides['model'] = model
-    if 'rtdetr' in model.lower():  # guess architecture
+        model = "yolov8n.pt"
+        LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
+    overrides["model"] = model
+    # stem = Path(model).stem.lower()
+    stem = model.lower()
+    if "rtdetr" in stem:  # guess architecture
        from ultralytics import RTDETR
+
        model = RTDETR(model)  # no task argument
-    elif 'fastsam' in model.lower():
+    elif "fastsam" in stem:
        from ultralytics import FastSAM
+
        model = FastSAM(model)
-    elif 'sam' in model.lower():
+    elif "sam" in stem:
        from ultralytics import SAM
+
        model = SAM(model)
-    else:
+    elif re.search("v3|v5|v6|v8|v9", stem):
        from ultralytics import YOLO
+
        model = YOLO(model, task=task)
-    if isinstance(overrides.get('pretrained'), str):
-        model.load(overrides['pretrained'])
+    else:
+        from ultralytics import YOLOv10
+
+        # Special case for the HuggingFace Hub
+        split_path = model.split('/')
+        if len(split_path) == 2 and (not os.path.exists(model)):
+            model = YOLOv10.from_pretrained(model)
+        else:
+            model = YOLOv10(model)
+    if isinstance(overrides.get("pretrained"), str):
+        model.load(overrides["pretrained"])

    # Task Update
    if task != model.task:
        if task:
-            LOGGER.warning(f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
-                           f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model.")
+            LOGGER.warning(
+                f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
+                f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
+            )
        task = model.task

    # Mode
-    if mode in ('predict', 'track') and 'source' not in overrides:
-        overrides['source'] = DEFAULT_CFG.source or ASSETS
-        LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.")
-    elif mode in ('train', 'val'):
-        if 'data' not in overrides and 'resume' not in overrides:
-            overrides['data'] = TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
-            LOGGER.warning(f"WARNING ⚠️ 'data' is missing. Using default 'data={overrides['data']}'.")
-    elif mode == 'export':
-        if 'format' not in overrides:
-            overrides['format'] = DEFAULT_CFG.format or 'torchscript'
-            LOGGER.warning(f"WARNING ⚠️ 'format' is missing. Using default 'format={overrides['format']}'.")
+    if mode in ("predict", "track") and "source" not in overrides:
+        overrides["source"] = DEFAULT_CFG.source or ASSETS
+        LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
+    elif mode in ("train", "val"):
+        if "data" not in overrides and "resume" not in overrides:
+            overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
+            LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
+    elif mode == "export":
+        if "format" not in overrides:
+            overrides["format"] = DEFAULT_CFG.format or "torchscript"
+            LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")

    # Run command in python
-    # getattr(model, mode)(**vars(get_cfg(overrides=overrides)))  # default args using default.yaml
    getattr(model, mode)(**overrides)  # default args from model

+    # Show help
+    LOGGER.info(f"💡 Learn more at https://docs.ultralytics.com/modes/{mode}")
+

 # Special modes --------------------------------------------------------------------------------------------------------
 def copy_default_cfg():
    """Copy and create a new default configuration file with '_copy' appended to its name."""
-    new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml')
+    new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
    shutil.copy2(DEFAULT_CFG_PATH, new_file)
-    LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n'
-                f"Example YOLO command with this new custom cfg:\n    yolo cfg='{new_file}' imgsz=320 batch=8")
+    LOGGER.info(
+        f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
+        f"Example YOLO command with this new custom cfg:\n    yolo cfg='{new_file}' imgsz=320 batch=8"
+    )


-if __name__ == '__main__':
+if __name__ == "__main__":
    # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
-    entrypoint(debug='')
+    entrypoint(debug="")
--- a/ultralytics/cfg/pycache/init.cpython-312.pyc
+++ b/ultralytics/cfg/pycache/init.cpython-312.pyc
--- a/ultralytics/cfg/pycache/init.cpython-39.pyc
+++ b/ultralytics/cfg/pycache/init.cpython-39.pyc
--- a/ultralytics/cfg/datasets/Argoverse.yaml
+++ b/ultralytics/cfg/datasets/Argoverse.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
 # Example usage: yolo train data=Argoverse.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── Argoverse  ← downloads here (31.5 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/Argoverse  # dataset root dir
-train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
-val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
-test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+path: ../datasets/Argoverse # dataset root dir
+train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview

 # Classes
 names:
@ -24,7 +24,6 @@ names:
  6: traffic_light
  7: stop_sign

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
@ -64,7 +63,9 @@ download: |
  # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
-  download(urls, dir=dir)
+  print("\n\nWARNING: Argoverse dataset MUST be downloaded manually, autodownload will NOT work.")
+  print(f"WARNING: Manually download Argoverse dataset '{urls[0]}' to '{dir}' and re-run your command.\n\n")
+  # download(urls, dir=dir)

  # Convert
  annotations_dir = 'Argoverse-HD/annotations/'
--- a/ultralytics/cfg/datasets/DOTAv1.5.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.5.yaml
@ -1,18 +1,19 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
-# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
+# DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
+# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
 # parent
 # ├── ultralytics
 # └── datasets
-#     └── dota2  ← downloads here (2GB)
+#     └── dota1.5  ← downloads here (2GB)

 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/DOTAv2  # dataset root dir
-train: images/train  # train images (relative to 'path') 1411 images
-val: images/val  # val images (relative to 'path') 458 images
-test: images/test  # test images (optional) 937 images
+path: ../datasets/DOTAv1.5 # dataset root dir
+train: images/train # train images (relative to 'path') 1411 images
+val: images/val # val images (relative to 'path') 458 images
+test: images/test # test images (optional) 937 images

-# Classes for DOTA 2.0
+# Classes for DOTA 1.5
 names:
  0: plane
  1: ship
@ -30,8 +31,6 @@ names:
  13: soccer ball field
  14: swimming pool
  15: container crane
-  16: airport
-  17: helipad

 # Download script/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
--- a/ultralytics/cfg/datasets/DOTAv1.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.yaml
@ -0,0 +1,35 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
+# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota1  ← downloads here (2GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/DOTAv1 # dataset root dir
+train: images/train # train images (relative to 'path') 1411 images
+val: images/val # val images (relative to 'path') 458 images
+test: images/test # test images (optional) 937 images
+
+# Classes for DOTA 1.0
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
--- a/ultralytics/cfg/datasets/GlobalWheat2020.yaml
+++ b/ultralytics/cfg/datasets/GlobalWheat2020.yaml
@ -1,14 +1,14 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
+# Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
+# Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
 # Example usage: yolo train data=GlobalWheat2020.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── GlobalWheat2020  ← downloads here (7.0 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/GlobalWheat2020  # dataset root dir
+path: ../datasets/GlobalWheat2020 # dataset root dir
 train: # train images (relative to 'path') 3422 images
  - images/arvalis_1
  - images/arvalis_2
@ -29,7 +29,6 @@ test: # test images (optional) 1276 images
 names:
  0: wheat_head

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from ultralytics.utils.downloads import download
--- a/ultralytics/cfg/datasets/ImageNet.yaml
+++ b/ultralytics/cfg/datasets/ImageNet.yaml
@ -1,18 +1,18 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
 # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
+# Documentation: https://docs.ultralytics.com/datasets/classify/imagenet/
 # Example usage: yolo train task=classify data=imagenet
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── imagenet  ← downloads here (144 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/imagenet  # dataset root dir
-train: train  # train images (relative to 'path') 1281167 images
-val: val  # val images (relative to 'path') 50000 images
-test:  # test images (optional)
+path: ../datasets/imagenet # dataset root dir
+train: train # train images (relative to 'path') 1281167 images
+val: val # val images (relative to 'path') 50000 images
+test: # test images (optional)

 # Classes
 names:
@ -2020,6 +2020,5 @@ map:
  n13133613: ear
  n15075141: toilet_tissue

-
 # Download script/URL (optional)
 download: yolo/data/scripts/get_imagenet.sh
--- a/ultralytics/cfg/datasets/Objects365.yaml
+++ b/ultralytics/cfg/datasets/Objects365.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Objects365 dataset https://www.objects365.org/ by Megvii
+# Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
 # Example usage: yolo train data=Objects365.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/Objects365  # dataset root dir
-train: images/train  # train images (relative to 'path') 1742289 images
+path: ../datasets/Objects365 # dataset root dir
+train: images/train # train images (relative to 'path') 1742289 images
 val: images/val # val images (relative to 'path') 80000 images
-test:  # test images (optional)
+test: # test images (optional)

 # Classes
 names:
@ -381,7 +381,6 @@ names:
  363: Curling
  364: Table Tennis

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from tqdm import tqdm
--- a/ultralytics/cfg/datasets/SKU-110K.yaml
+++ b/ultralytics/cfg/datasets/SKU-110K.yaml
@ -1,23 +1,22 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
+# Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
 # Example usage: yolo train data=SKU-110K.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── SKU-110K  ← downloads here (13.6 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/SKU-110K  # dataset root dir
-train: train.txt  # train images (relative to 'path')  8219 images
-val: val.txt  # val images (relative to 'path')  588 images
-test: test.txt  # test images (optional)  2936 images
+path: ../datasets/SKU-110K # dataset root dir
+train: train.txt # train images (relative to 'path')  8219 images
+val: val.txt # val images (relative to 'path')  588 images
+test: test.txt # test images (optional)  2936 images

 # Classes
 names:
  0: object

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import shutil
--- a/ultralytics/cfg/datasets/VOC.yaml
+++ b/ultralytics/cfg/datasets/VOC.yaml
@ -1,12 +1,12 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
+# Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
 # Example usage: yolo train data=VOC.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── VOC  ← downloads here (2.8 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VOC
 train: # train images (relative to 'path')  16551 images
@ -42,7 +42,6 @@ names:
  18: train
  19: tvmonitor

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import xml.etree.ElementTree as ET
@ -81,7 +80,7 @@ download: |
  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
-  download(urls, dir=dir / 'images', curl=True, threads=3)
+  download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True)  # download and unzip over existing paths (required)

  # Convert
  path = dir / 'images/VOCdevkit'
--- a/ultralytics/cfg/datasets/VisDrone.yaml
+++ b/ultralytics/cfg/datasets/VisDrone.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
+# Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
 # Example usage: yolo train data=VisDrone.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── VisDrone  ← downloads here (2.3 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/VisDrone  # dataset root dir
-train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
-val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
-test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
+path: ../datasets/VisDrone # dataset root dir
+train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images

 # Classes
 names:
@ -26,7 +26,6 @@ names:
  8: bus
  9: motor

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import os
--- a/ultralytics/cfg/datasets/african-wildlife.yaml
+++ b/ultralytics/cfg/datasets/african-wildlife.yaml
@ -0,0 +1,24 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# African-wildlife dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
+# Example usage: yolo train data=african-wildlife.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── african-wildlife  ← downloads here (100 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/african-wildlife # dataset root dir
+train: train/images # train images (relative to 'path') 1052 images
+val: valid/images # val images (relative to 'path') 225 images
+test: test/images # test images (relative to 'path') 227 images
+
+# Classes
+names:
+  0: buffalo
+  1: elephant
+  2: rhino
+  3: zebra
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/african-wildlife.zip
--- a/ultralytics/cfg/datasets/brain-tumor.yaml
+++ b/ultralytics/cfg/datasets/brain-tumor.yaml
@ -0,0 +1,22 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Brain-tumor dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
+# Example usage: yolo train data=brain-tumor.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── brain-tumor  ← downloads here (4.05 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/brain-tumor # dataset root dir
+train: train/images # train images (relative to 'path') 893 images
+val: valid/images # val images (relative to 'path') 223 images
+test: # test images (relative to 'path')
+
+# Classes
+names:
+  0: negative
+  1: positive
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/brain-tumor.zip
--- a/ultralytics/cfg/datasets/carparts-seg.yaml
+++ b/ultralytics/cfg/datasets/carparts-seg.yaml
@ -0,0 +1,43 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Carparts-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
+# Example usage: yolo train data=carparts-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── carparts-seg  ← downloads here (132 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/carparts-seg # dataset root dir
+train: train/images # train images (relative to 'path') 3516 images
+val: valid/images # val images (relative to 'path') 276 images
+test: test/images # test images (relative to 'path') 401 images
+
+# Classes
+names:
+  0: back_bumper
+  1: back_door
+  2: back_glass
+  3: back_left_door
+  4: back_left_light
+  5: back_light
+  6: back_right_door
+  7: back_right_light
+  8: front_bumper
+  9: front_door
+  10: front_glass
+  11: front_left_door
+  12: front_left_light
+  13: front_light
+  14: front_right_door
+  15: front_right_light
+  16: hood
+  17: left_mirror
+  18: object
+  19: right_mirror
+  20: tailgate
+  21: trunk
+  22: wheel
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/carparts-seg.zip
--- a/ultralytics/cfg/datasets/coco-pose.yaml
+++ b/ultralytics/cfg/datasets/coco-pose.yaml
@ -1,20 +1,20 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-# COCO 2017 dataset http://cocodataset.org by Microsoft
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.ultralytics.com/datasets/pose/coco/
 # Example usage: yolo train data=coco-pose.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco-pose  ← downloads here (20.1 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco-pose  # dataset root dir
-train: train2017.txt  # train images (relative to 'path') 118287 images
-val: val2017.txt  # val images (relative to 'path') 5000 images
-test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+path: ../datasets/coco-pose # dataset root dir
+train: train2017.txt # train images (relative to 'path') 118287 images
+val: val2017.txt # val images (relative to 'path') 5000 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

 # Keypoints
-kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]

 # Classes
--- a/ultralytics/cfg/datasets/coco.yaml
+++ b/ultralytics/cfg/datasets/coco.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-# COCO 2017 dataset http://cocodataset.org by Microsoft
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
 # Example usage: yolo train data=coco.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco  ← downloads here (20.1 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco  # dataset root dir
-train: train2017.txt  # train images (relative to 'path') 118287 images
-val: val2017.txt  # val images (relative to 'path') 5000 images
-test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+path: ../datasets/coco # dataset root dir
+train: train2017.txt # train images (relative to 'path') 118287 images
+val: val2017.txt # val images (relative to 'path') 5000 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794

 # Classes
 names:
@ -96,7 +96,6 @@ names:
  78: hair drier
  79: toothbrush

-
 # Download script/URL (optional)
 download: |
  from ultralytics.utils.downloads import download
--- a/ultralytics/cfg/datasets/coco128-seg.yaml
+++ b/ultralytics/cfg/datasets/coco128-seg.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
 # Example usage: yolo train data=coco128.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco128-seg  ← downloads here (7 MB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco128-seg  # dataset root dir
-train: images/train2017  # train images (relative to 'path') 128 images
-val: images/train2017  # val images (relative to 'path') 128 images
-test:  # test images (optional)
+path: ../datasets/coco128-seg # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)

 # Classes
 names:
@ -96,6 +96,5 @@ names:
  78: hair drier
  79: toothbrush

-
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco128-seg.zip
--- a/ultralytics/cfg/datasets/coco128.yaml
+++ b/ultralytics/cfg/datasets/coco128.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
 # Example usage: yolo train data=coco128.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco128  ← downloads here (7 MB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco128  # dataset root dir
-train: images/train2017  # train images (relative to 'path') 128 images
-val: images/train2017  # val images (relative to 'path') 128 images
-test:  # test images (optional)
+path: ../datasets/coco128 # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)

 # Classes
 names:
@ -96,6 +96,5 @@ names:
  78: hair drier
  79: toothbrush

-
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco128.zip
--- a/ultralytics/cfg/datasets/coco8-pose.yaml
+++ b/ultralytics/cfg/datasets/coco8-pose.yaml
@ -1,20 +1,20 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
 # Example usage: yolo train data=coco8-pose.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8-pose  ← downloads here (1 MB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8-pose  # dataset root dir
-train: images/train  # train images (relative to 'path') 4 images
-val: images/val  # val images (relative to 'path') 4 images
-test:  # test images (optional)
+path: ../datasets/coco8-pose # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)

 # Keypoints
-kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]

 # Classes
--- a/ultralytics/cfg/datasets/coco8-seg.yaml
+++ b/ultralytics/cfg/datasets/coco8-seg.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
 # Example usage: yolo train data=coco8-seg.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8-seg  ← downloads here (1 MB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8-seg  # dataset root dir
-train: images/train  # train images (relative to 'path') 4 images
-val: images/val  # val images (relative to 'path') 4 images
-test:  # test images (optional)
+path: ../datasets/coco8-seg # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)

 # Classes
 names:
@ -96,6 +96,5 @@ names:
  78: hair drier
  79: toothbrush

-
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco8-seg.zip
--- a/ultralytics/cfg/datasets/coco8.yaml
+++ b/ultralytics/cfg/datasets/coco8.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
 # Example usage: yolo train data=coco8.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── coco8  ← downloads here (1 MB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/coco8  # dataset root dir
-train: images/train  # train images (relative to 'path') 4 images
-val: images/val  # val images (relative to 'path') 4 images
-test:  # test images (optional)
+path: ../datasets/coco8 # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)

 # Classes
 names:
@ -96,6 +96,5 @@ names:
  78: hair drier
  79: toothbrush

-
 # Download script/URL (optional)
 download: https://ultralytics.com/assets/coco8.zip
--- a/ultralytics/cfg/datasets/crack-seg.yaml
+++ b/ultralytics/cfg/datasets/crack-seg.yaml
@ -0,0 +1,21 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Crack-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
+# Example usage: yolo train data=crack-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── crack-seg  ← downloads here (91.2 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/crack-seg # dataset root dir
+train: train/images # train images (relative to 'path') 3717 images
+val: valid/images # val images (relative to 'path') 112 images
+test: test/images # test images (relative to 'path') 200 images
+
+# Classes
+names:
+  0: crack
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/crack-seg.zip
--- a/ultralytics/cfg/datasets/dota8.yaml
+++ b/ultralytics/cfg/datasets/dota8.yaml
@ -0,0 +1,34 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
+# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota8  ← downloads here (1MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/dota8 # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+
+# Classes for DOTA 1.0
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
--- a/ultralytics/cfg/datasets/open-images-v7.yaml
+++ b/ultralytics/cfg/datasets/open-images-v7.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
+# Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
 # Example usage: yolo train data=open-images-v7.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── open-images-v7  ← downloads here (561 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/open-images-v7  # dataset root dir
-train: images/train  # train images (relative to 'path') 1743042 images
-val: images/val  # val images (relative to 'path') 41620 images
-test:  # test images (optional)
+path: ../datasets/open-images-v7 # dataset root dir
+train: images/train # train images (relative to 'path') 1743042 images
+val: images/val # val images (relative to 'path') 41620 images
+test: # test images (optional)

 # Classes
 names:
@ -617,7 +617,6 @@ names:
  599: Zebra
  600: Zucchini

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
--- a/ultralytics/cfg/datasets/package-seg.yaml
+++ b/ultralytics/cfg/datasets/package-seg.yaml
@ -0,0 +1,21 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Package-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
+# Example usage: yolo train data=package-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── package-seg  ← downloads here (102 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/package-seg # dataset root dir
+train: images/train # train images (relative to 'path') 1920 images
+val: images/val # val images (relative to 'path') 89 images
+test: test/images # test images (relative to 'path') 188 images
+
+# Classes
+names:
+  0: package
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/package-seg.zip
--- a/ultralytics/cfg/datasets/tiger-pose.yaml
+++ b/ultralytics/cfg/datasets/tiger-pose.yaml
@ -0,0 +1,24 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Tiger Pose dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
+# Example usage: yolo train data=tiger-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── tiger-pose  ← downloads here (75.3 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/tiger-pose # dataset root dir
+train: train # train images (relative to 'path') 210 images
+val: val # val images (relative to 'path') 53 images
+
+# Keypoints
+kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+
+# Classes
+names:
+  0: tiger
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/tiger-pose.zip
--- a/ultralytics/cfg/datasets/xView.yaml
+++ b/ultralytics/cfg/datasets/xView.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
 # --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
+# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
 # Example usage: yolo train data=xView.yaml
 # parent
 # ├── ultralytics
 # └── datasets
 #     └── xView  ← downloads here (20.7 GB)

-
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
-path: ../datasets/xView  # dataset root dir
-train: images/autosplit_train.txt  # train images (relative to 'path') 90% of 847 train images
-val: images/autosplit_val.txt  # train images (relative to 'path') 10% of 847 train images
+path: ../datasets/xView # dataset root dir
+train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
+val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images

 # Classes
 names:
@ -76,7 +76,6 @@ names:
  58: Pylon
  59: Tower

-
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@ -1,116 +1,127 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default training settings and hyperparameters for medium-augmentation COCO training

-task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
-mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark

 # Train settings -------------------------------------------------------------------------------------------------------
-model:  # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
-data:  # (str, optional) path to data file, i.e. coco128.yaml
-epochs: 100  # (int) number of epochs to train for
-patience: 50  # (int) epochs to wait for no observable improvement for early stopping of training
-batch: 16  # (int) number of images per batch (-1 for AutoBatch)
-imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
-save: True  # (bool) save train checkpoints and predict results
+model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data: # (str, optional) path to data file, i.e. coco128.yaml
+epochs: 100 # (int) number of epochs to train for
+time: # (float, optional) number of hours to train for, overrides epochs if supplied
+patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
+batch: 16 # (int) number of images per batch (-1 for AutoBatch)
+imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+save: True # (bool) save train checkpoints and predict results
 save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
-cache: False  # (bool) True/ram, disk or False. Use cache for data loading
-device:  # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
-workers: 8  # (int) number of worker threads for data loading (per RANK if DDP)
-project:  # (str, optional) project name
-name:  # (str, optional) experiment name, results saved to 'project/name' directory
-exist_ok: False  # (bool) whether to overwrite existing experiment
-pretrained: True  # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
-optimizer: auto  # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
-verbose: True  # (bool) whether to print verbose output
-seed: 0  # (int) random seed for reproducibility
-deterministic: True  # (bool) whether to enable deterministic mode
-single_cls: False  # (bool) train multi-class data as single-class
-rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
-cos_lr: False  # (bool) use cosine learning rate scheduler
-close_mosaic: 10  # (int) disable mosaic augmentation for final epochs (0 to disable)
-resume: False  # (bool) resume training from last checkpoint
-amp: True  # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
-fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
-profile: False  # (bool) profile ONNX and TensorRT speeds during training for loggers
-freeze: None  # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+val_period: 1 # (int) Validation every x epochs
+cache: False # (bool) True/ram, disk or False. Use cache for data loading
+device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
+project: # (str, optional) project name
+name: # (str, optional) experiment name, results saved to 'project/name' directory
+exist_ok: False # (bool) whether to overwrite existing experiment
+pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
+optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+verbose: True # (bool) whether to print verbose output
+seed: 0 # (int) random seed for reproducibility
+deterministic: True # (bool) whether to enable deterministic mode
+single_cls: False # (bool) train multi-class data as single-class
+rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False # (bool) use cosine learning rate scheduler
+close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
+resume: False # (bool) resume training from last checkpoint
+amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
+profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
+freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+multi_scale: False # (bool) Whether to use multiscale during training
 # Segmentation
-overlap_mask: True  # (bool) masks should overlap during training (segment train only)
-mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+overlap_mask: True # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4 # (int) mask downsample ratio (segment train only)
 # Classification
-dropout: 0.0  # (float) use dropout regularization (classify train only)
+dropout: 0.0 # (float) use dropout regularization (classify train only)

 # Val/Test settings ----------------------------------------------------------------------------------------------------
-val: True  # (bool) validate/test during training
-split: val  # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
-save_json: False  # (bool) save results to JSON file
-save_hybrid: False  # (bool) save hybrid version of labels (labels + additional predictions)
-conf:  # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
-iou: 0.7  # (float) intersection over union (IoU) threshold for NMS
-max_det: 300  # (int) maximum number of detections per image
-half: False  # (bool) use half precision (FP16)
-dnn: False  # (bool) use OpenCV DNN for ONNX inference
-plots: True  # (bool) save plots during train/val
+val: True # (bool) validate/test during training
+split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: False # (bool) save results to JSON file
+save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
+conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
+max_det: 300 # (int) maximum number of detections per image
+half: False # (bool) use half precision (FP16)
+dnn: False # (bool) use OpenCV DNN for ONNX inference
+plots: True # (bool) save plots and images during train/val

-# Prediction settings --------------------------------------------------------------------------------------------------
-source:  # (str, optional) source directory for images or videos
-show: False  # (bool) show results if possible
-save_txt: False  # (bool) save results as .txt file
-save_conf: False  # (bool) save results with confidence scores
-save_crop: False  # (bool) save cropped images with results
-show_labels: True  # (bool) show object labels in plots
-show_conf: True  # (bool) show object confidence scores in plots
-vid_stride: 1  # (int) video frame-rate stride
-stream_buffer: False  # (bool) buffer all streaming frames (True) or return the most recent frame (False)
-line_width:   # (int, optional) line width of the bounding boxes, auto if missing
-visualize: False  # (bool) visualize model features
-augment: False  # (bool) apply image augmentation to prediction sources
-agnostic_nms: False  # (bool) class-agnostic NMS
-classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
-retina_masks: False  # (bool) use high-resolution segmentation masks
-boxes: True  # (bool) Show boxes in segmentation predictions
+# Predict settings -----------------------------------------------------------------------------------------------------
+source: # (str, optional) source directory for images or videos
+vid_stride: 1 # (int) video frame-rate stride
+stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
+visualize: False # (bool) visualize model features
+augment: False # (bool) apply image augmentation to prediction sources
+agnostic_nms: False # (bool) class-agnostic NMS
+classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+retina_masks: False # (bool) use high-resolution segmentation masks
+embed: # (list[int], optional) return feature vectors/embeddings from given layers
+
+# Visualize settings ---------------------------------------------------------------------------------------------------
+show: False # (bool) show predicted images and videos if environment allows
+save_frames: False # (bool) save predicted individual video frames
+save_txt: False # (bool) save results as .txt file
+save_conf: False # (bool) save results with confidence scores
+save_crop: False # (bool) save cropped images with results
+show_labels: True # (bool) show prediction labels, i.e. 'person'
+show_conf: True # (bool) show prediction confidence, i.e. '0.99'
+show_boxes: True # (bool) show prediction boxes
+line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.

 # Export settings ------------------------------------------------------------------------------------------------------
-format: torchscript  # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
-keras: False  # (bool) use Kera=s
-optimize: False  # (bool) TorchScript: optimize for mobile
-int8: False  # (bool) CoreML/TF INT8 quantization
-dynamic: False  # (bool) ONNX/TF/TensorRT: dynamic axes
-simplify: False  # (bool) ONNX: simplify model
-opset:  # (int, optional) ONNX: opset version
-workspace: 4  # (int) TensorRT: workspace size (GB)
-nms: False  # (bool) CoreML: add NMS
+format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
+keras: False # (bool) use Kera=s
+optimize: False # (bool) TorchScript: optimize for mobile
+int8: False # (bool) CoreML/TF INT8 quantization
+dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
+simplify: False # (bool) ONNX: simplify model using `onnxslim`
+opset: # (int, optional) ONNX: opset version
+workspace: 4 # (int) TensorRT: workspace size (GB)
+nms: False # (bool) CoreML: add NMS

 # Hyperparameters ------------------------------------------------------------------------------------------------------
-lr0: 0.01  # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
-lrf: 0.01  # (float) final learning rate (lr0 * lrf)
-momentum: 0.937  # (float) SGD momentum/Adam beta1
-weight_decay: 0.0005  # (float) optimizer weight decay 5e-4
-warmup_epochs: 3.0  # (float) warmup epochs (fractions ok)
-warmup_momentum: 0.8  # (float) warmup initial momentum
-warmup_bias_lr: 0.1  # (float) warmup initial bias lr
-box: 7.5  # (float) box loss gain
-cls: 0.5  # (float) cls loss gain (scale with pixels)
-dfl: 1.5  # (float) dfl loss gain
-pose: 12.0  # (float) pose loss gain
-kobj: 1.0  # (float) keypoint obj loss gain
-label_smoothing: 0.0  # (float) label smoothing (fraction)
-nbs: 64  # (int) nominal batch size
-hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
-hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
-hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
-degrees: 0.0  # (float) image rotation (+/- deg)
-translate: 0.1  # (float) image translation (+/- fraction)
-scale: 0.5  # (float) image scale (+/- gain)
-shear: 0.0  # (float) image shear (+/- deg)
-perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
-flipud: 0.0  # (float) image flip up-down (probability)
-fliplr: 0.5  # (float) image flip left-right (probability)
-mosaic: 1.0  # (float) image mosaic (probability)
-mixup: 0.0  # (float) image mixup (probability)
-copy_paste: 0.0  # (float) segment copy-paste (probability)
+lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01 # (float) final learning rate (lr0 * lrf)
+momentum: 0.937 # (float) SGD momentum/Adam beta1
+weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
+warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
+warmup_momentum: 0.8 # (float) warmup initial momentum
+warmup_bias_lr: 0.1 # (float) warmup initial bias lr
+box: 7.5 # (float) box loss gain
+cls: 0.5 # (float) cls loss gain (scale with pixels)
+dfl: 1.5 # (float) dfl loss gain
+pose: 12.0 # (float) pose loss gain
+kobj: 1.0 # (float) keypoint obj loss gain
+label_smoothing: 0.0 # (float) label smoothing (fraction)
+nbs: 64 # (int) nominal batch size
+hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0 # (float) image rotation (+/- deg)
+translate: 0.1 # (float) image translation (+/- fraction)
+scale: 0.5 # (float) image scale (+/- gain)
+shear: 0.0 # (float) image shear (+/- deg)
+perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0 # (float) image flip up-down (probability)
+fliplr: 0.5 # (float) image flip left-right (probability)
+bgr: 0.0 # (float) image channel BGR (probability)
+mosaic: 1.0 # (float) image mosaic (probability)
+mixup: 0.0 # (float) image mixup (probability)
+copy_paste: 0.0 # (float) segment copy-paste (probability)
+auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
+erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
+crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)

 # Custom config.yaml ---------------------------------------------------------------------------------------------------
-cfg:  # (str, optional) for overriding defaults.yaml
+cfg: # (str, optional) for overriding defaults.yaml

 # Tracker settings ------------------------------------------------------------------------------------------------------
-tracker: botsort.yaml  # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
+tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
--- a/ultralytics/cfg/models/README.md
+++ b/ultralytics/cfg/models/README.md
@ -14,8 +14,7 @@ Model `*.yaml` files may be used directly in the Command Line Interface (CLI) wi
 yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
 ```

-They may also be used directly in a Python environment, and accepts the same
-[arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
+They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:

 ```python
 from ultralytics import YOLO
--- a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
@ -2,49 +2,49 @@
 # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
  # [depth, width, max_channels]
  l: [1.00, 1.00, 1024]

 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, HGStem, [32, 48]]  # 0-P2/4
-  - [-1, 6, HGBlock, [48, 128, 3]]  # stage 1
+  - [-1, 1, HGStem, [32, 48]] # 0-P2/4
+  - [-1, 6, HGBlock, [48, 128, 3]] # stage 1

-  - [-1, 1, DWConv, [128, 3, 2, 1, False]]  # 2-P3/8
-  - [-1, 6, HGBlock, [96, 512, 3]]   # stage 2
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
+  - [-1, 6, HGBlock, [96, 512, 3]] # stage 2

-  - [-1, 1, DWConv, [512, 3, 2, 1, False]]  # 4-P3/16
-  - [-1, 6, HGBlock, [192, 1024, 5, True, False]]  # cm, c2, k, light, shortcut
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
+  - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
-  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]  # stage 3
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3

-  - [-1, 1, DWConv, [1024, 3, 2, 1, False]]  # 8-P4/32
-  - [-1, 6, HGBlock, [384, 2048, 5, True, False]]  # stage 4
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
+  - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4

 head:
-  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 10 input_proj.2
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
  - [-1, 1, AIFI, [1024, 8]]
-  - [-1, 1, Conv, [256, 1, 1]]   # 12, Y5, lateral_convs.0
+  - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 14 input_proj.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
  - [[-2, -1], 1, Concat, [1]]
-  - [-1, 3, RepC3, [256]]  # 16, fpn_blocks.0
-  - [-1, 1, Conv, [256, 1, 1]]   # 17, Y4, lateral_convs.1
+  - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
+  - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 19 input_proj.0
-  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, RepC3, [256]]    # X3 (21), fpn_blocks.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1

-  - [-1, 1, Conv, [256, 3, 2]]   # 22, downsample_convs.0
-  - [[-1, 17], 1, Concat, [1]]  # cat Y4
-  - [-1, 3, RepC3, [256]]    # F4 (24), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
+  - [[-1, 17], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0

-  - [-1, 1, Conv, [256, 3, 2]]   # 25, downsample_convs.1
-  - [[-1, 12], 1, Concat, [1]]  # cat Y5
-  - [-1, 3, RepC3, [256]]    # F5 (27), pan_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
+  - [[-1, 12], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1

-  - [[21, 24, 27], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)
+  - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
@ -0,0 +1,42 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
@ -0,0 +1,42 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
@ -2,53 +2,53 @@
 # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
  # [depth, width, max_channels]
  x: [1.00, 1.00, 2048]

 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, HGStem, [32, 64]]  # 0-P2/4
-  - [-1, 6, HGBlock, [64, 128, 3]]  # stage 1
+  - [-1, 1, HGStem, [32, 64]] # 0-P2/4
+  - [-1, 6, HGBlock, [64, 128, 3]] # stage 1

-  - [-1, 1, DWConv, [128, 3, 2, 1, False]]  # 2-P3/8
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
  - [-1, 6, HGBlock, [128, 512, 3]]
-  - [-1, 6, HGBlock, [128, 512, 3, False, True]]   # 4-stage 2
+  - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2

-  - [-1, 1, DWConv, [512, 3, 2, 1, False]]  # 5-P3/16
-  - [-1, 6, HGBlock, [256, 1024, 5, True, False]]  # cm, c2, k, light, shortcut
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
+  - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
-  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]  # 10-stage 3
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3

-  - [-1, 1, DWConv, [1024, 3, 2, 1, False]]  # 11-P4/32
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
  - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
-  - [-1, 6, HGBlock, [512, 2048, 5, True, True]]  # 13-stage 4
+  - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4

 head:
-  - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 14 input_proj.2
+  - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
  - [-1, 1, AIFI, [2048, 8]]
-  - [-1, 1, Conv, [384, 1, 1]]   # 16, Y5, lateral_convs.0
+  - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 18 input_proj.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
  - [[-2, -1], 1, Concat, [1]]
-  - [-1, 3, RepC3, [384]]  # 20, fpn_blocks.0
-  - [-1, 1, Conv, [384, 1, 1]]   # 21, Y4, lateral_convs.1
+  - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
+  - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 23 input_proj.0
-  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, RepC3, [384]]    # X3 (25), fpn_blocks.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1

-  - [-1, 1, Conv, [384, 3, 2]]   # 26, downsample_convs.0
-  - [[-1, 21], 1, Concat, [1]]  # cat Y4
-  - [-1, 3, RepC3, [384]]    # F4 (28), pan_blocks.0
+  - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
+  - [[-1, 21], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0

-  - [-1, 1, Conv, [384, 3, 2]]   # 29, downsample_convs.1
-  - [[-1, 16], 1, Concat, [1]]  # cat Y5
-  - [-1, 3, RepC3, [384]]    # F5 (31), pan_blocks.1
+  - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
+  - [[-1, 16], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1

-  - [[25, 28, 31], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)
+  - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10b.yaml
+++ b/ultralytics/cfg/models/v10/yolov10b.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  b: [0.67, 1.00, 512] 
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10l.yaml
+++ b/ultralytics/cfg/models/v10/yolov10l.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10m.yaml
+++ b/ultralytics/cfg/models/v10/yolov10m.yaml
@ -0,0 +1,43 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10n.yaml
+++ b/ultralytics/cfg/models/v10/yolov10n.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] 
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10s.yaml
+++ b/ultralytics/cfg/models/v10/yolov10s.yaml
@ -0,0 +1,39 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  s: [0.33, 0.50, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10x.yaml
+++ b/ultralytics/cfg/models/v10/yolov10x.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2fCIB, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3-spp.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-spp.yaml
@ -2,47 +2,45 @@
 # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3

 # Parameters
-nc: 80  # number of classes
-depth_multiple: 1.0  # model depth multiple
-width_multiple: 1.0  # layer channel multiple
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple

 # darknet53 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [32, 3, 1]],  # 0
-   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
-   [-1, 1, Bottleneck, [64]],
-   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
-   [-1, 2, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
-   [-1, 8, Bottleneck, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
-   [-1, 8, Bottleneck, [512]],
-   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
-   [-1, 4, Bottleneck, [1024]],  # 10
-  ]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10

 # YOLOv3-SPP head
 head:
-  [[-1, 1, Bottleneck, [1024, False]],
-   [-1, 1, SPP, [512, [5, 9, 13]]],
-   [-1, 1, Conv, [1024, 3, 1]],
-   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, SPP, [512, [5, 9, 13]]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)

-   [-2, 1, Conv, [256, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
-   [-1, 1, Bottleneck, [512, False]],
-   [-1, 1, Bottleneck, [512, False]],
-   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)

-   [-2, 1, Conv, [128, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
-   [-1, 1, Bottleneck, [256, False]],
-   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)

-   [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
-  ]
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3-tiny.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-tiny.yaml
@ -2,38 +2,36 @@
 # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3

 # Parameters
-nc: 80  # number of classes
-depth_multiple: 1.0  # model depth multiple
-width_multiple: 1.0  # layer channel multiple
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple

 # YOLOv3-tiny backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [16, 3, 1]],  # 0
-   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
-   [-1, 1, Conv, [32, 3, 1]],
-   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
-   [-1, 1, Conv, [64, 3, 1]],
-   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
-   [-1, 1, Conv, [128, 3, 1]],
-   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
-   [-1, 1, Conv, [256, 3, 1]],
-   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
-   [-1, 1, Conv, [512, 3, 1]],
-   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
-   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
-  ]
+  - [-1, 1, Conv, [16, 3, 1]] # 0
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
+  - [-1, 1, Conv, [32, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
+  - [-1, 1, Conv, [64, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
+  - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12

 # YOLOv3-tiny head
 head:
-  [[-1, 1, Conv, [1024, 3, 1]],
-   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)

-   [-2, 1, Conv, [128, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
-   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)

-   [[19, 15], 1, Detect, [nc]],  # Detect(P4, P5)
-  ]
+  - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3.yaml
+++ b/ultralytics/cfg/models/v3/yolov3.yaml
@ -2,47 +2,45 @@
 # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3

 # Parameters
-nc: 80  # number of classes
-depth_multiple: 1.0  # model depth multiple
-width_multiple: 1.0  # layer channel multiple
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple

 # darknet53 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [32, 3, 1]],  # 0
-   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
-   [-1, 1, Bottleneck, [64]],
-   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
-   [-1, 2, Bottleneck, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
-   [-1, 8, Bottleneck, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
-   [-1, 8, Bottleneck, [512]],
-   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
-   [-1, 4, Bottleneck, [1024]],  # 10
-  ]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10

 # YOLOv3 head
 head:
-  [[-1, 1, Bottleneck, [1024, False]],
-   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 1, Conv, [1024, 3, 1]],
-   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)

-   [-2, 1, Conv, [256, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
-   [-1, 1, Bottleneck, [512, False]],
-   [-1, 1, Bottleneck, [512, False]],
-   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)

-   [-2, 1, Conv, [128, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
-   [-1, 1, Bottleneck, [256, False]],
-   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)

-   [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
-  ]
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v5/yolov5-p6.yaml
+++ b/ultralytics/cfg/models/v5/yolov5-p6.yaml
@ -2,7 +2,7 @@
 # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,48 +14,46 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will ca
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
-   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [768]],
-   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 11
-  ]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [768]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 11

 # YOLOv5 v6.0 head
 head:
-  [[-1, 1, Conv, [768, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
-   [-1, 3, C3, [768, False]],  # 15
+  - [-1, 1, Conv, [768, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3, [768, False]] # 15

-   [-1, 1, Conv, [512, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, C3, [512, False]],  # 19
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 19

-   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 23 (P3/8-small)

-   [-1, 1, Conv, [256, 3, 2]],
-   [[-1, 20], 1, Concat, [1]],  # cat head P4
-   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 20], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)

-   [-1, 1, Conv, [512, 3, 2]],
-   [[-1, 16], 1, Concat, [1]],  # cat head P5
-   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 16], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [768, False]] # 29 (P5/32-large)

-   [-1, 1, Conv, [768, 3, 2]],
-   [[-1, 12], 1, Concat, [1]],  # cat head P6
-   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)

-   [[23, 26, 29, 32], 1, Detect, [nc]],  # Detect(P3, P4, P5, P6)
-  ]
+  - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v5/yolov5.yaml
+++ b/ultralytics/cfg/models/v5/yolov5.yaml
@ -2,7 +2,7 @@
 # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,37 +14,35 @@ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call
 # YOLOv5 v6.0 backbone
 backbone:
  # [from, number, module, args]
-  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
-   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, C3, [128]],
-   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 6, C3, [256]],
-   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, C3, [512]],
-   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 3, C3, [1024]],
-   [-1, 1, SPPF, [1024, 5]],  # 9
-  ]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv5 v6.0 head
 head:
-  [[-1, 1, Conv, [512, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, C3, [512, False]],  # 13
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 13

-   [-1, 1, Conv, [256, 1, 1]],
-   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
-   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 17 (P3/8-small)

-   [-1, 1, Conv, [256, 3, 2]],
-   [[-1, 14], 1, Concat, [1]],  # cat head P4
-   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)

-   [-1, 1, Conv, [512, 3, 2]],
-   [[-1, 10], 1, Concat, [1]],  # cat head P5
-   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)

-   [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
-  ]
+  - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v6/yolov6.yaml
+++ b/ultralytics/cfg/models/v6/yolov6.yaml
@ -2,8 +2,8 @@
 # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6

 # Parameters
-nc: 80  # number of classes
-activation: nn.ReLU()  # (optional) model default activation function
+nc: 80 # number of classes
+activation: nn.ReLU() # (optional) model default activation function
 scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -15,39 +15,39 @@ scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call
 # YOLOv6-3.0s backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 6, Conv, [128, 3, 1]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 12, Conv, [256, 3, 1]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 18, Conv, [512, 3, 1]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 6, Conv, [1024, 3, 1]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv6-3.0s head
 head:
  - [-1, 1, Conv, [256, 1, 1]]
  - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
  - [-1, 1, Conv, [256, 3, 1]]
-  - [-1, 9, Conv, [256, 3, 1]]  # 14
+  - [-1, 9, Conv, [256, 3, 1]] # 14

  - [-1, 1, Conv, [128, 1, 1]]
  - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
  - [-1, 1, Conv, [128, 3, 1]]
-  - [-1, 9, Conv, [128, 3, 1]]  # 19
+  - [-1, 9, Conv, [128, 3, 1]] # 19

  - [-1, 1, Conv, [128, 3, 2]]
-  - [[-1, 15], 1, Concat, [1]]  # cat head P4
+  - [[-1, 15], 1, Concat, [1]] # cat head P4
  - [-1, 1, Conv, [256, 3, 1]]
-  - [-1, 9, Conv, [256, 3, 1]]  # 23
+  - [-1, 9, Conv, [256, 3, 1]] # 23

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 10], 1, Concat, [1]]  # cat head P5
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
  - [-1, 1, Conv, [512, 3, 1]]
-  - [-1, 9, Conv, [512, 3, 1]]  # 27
+  - [-1, 9, Conv, [512, 3, 1]] # 27

-  - [[19, 23, 27], 1, Detect, [nc]]  # Detect(P3, P4, P5)
+  - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-cls.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls.yaml
@ -2,7 +2,7 @@
 # YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify

 # Parameters
-nc: 1000  # number of classes
+nc: 1000 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,16 +14,16 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will c
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]

 # YOLOv8.0n head
 head:
-  - [-1, 1, Classify, [nc]]  # Classify
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
@ -0,0 +1,54 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters,   2033928 gradients,  13.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters,   5562064 gradients,  25.1 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters,   9031712 gradients,  42.8 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients,  69.1 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
+
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0-ghost-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
+
+  - [-1, 1, GhostConv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
+
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
@ -0,0 +1,56 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
+
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [768, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv8.0-ghost-p6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3Ghost, [768]] # 14
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 17
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
+
+  - [-1, 1, GhostConv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
+
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-ghost.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost.yaml
@ -0,0 +1,47 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers,  1865316 parameters,  1865300 gradients,   5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers,  5960072 parameters,  5960056 gradients,  16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients,  32.7 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients,  53.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients,  83.3 GFLOPs
+
+# YOLOv8.0n-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-obb.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-obb.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p2.yaml
@ -2,7 +2,7 @@
 # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,41 +14,41 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call
 # YOLOv8.0 backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv8.0-p2 head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 2], 1, Concat, [1]]  # cat backbone P2
-  - [-1, 3, C2f, [128]]  # 18 (P2/4-xsmall)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)

  - [-1, 1, Conv, [128, 3, 2]]
-  - [[-1, 15], 1, Concat, [1]]  # cat head P3
-  - [-1, 3, C2f, [256]]  # 21 (P3/8-small)
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C2f, [256]] # 21 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 12], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2f, [512]]  # 24 (P4/16-medium)
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 24 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 9], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2f, [1024]]  # 27 (P5/32-large)
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 27 (P5/32-large)

-  - [[18, 21, 24, 27], 1, Detect, [nc]]  # Detect(P2, P3, P4, P5)
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p6.yaml
@ -2,7 +2,7 @@
 # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,43 +14,43 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will ca
 # YOLOv8.0x6 backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [768, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 11
+  - [-1, 1, SPPF, [1024, 5]] # 11

 # YOLOv8.0x6 head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
-  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 17], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 14], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)

  - [-1, 1, Conv, [768, 3, 2]]
-  - [[-1, 11], 1, Concat, [1]]  # cat head P6
-  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)

-  - [[20, 23, 26, 29], 1, Detect, [nc]]  # Detect(P3, P4, P5, P6)
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
@ -2,8 +2,8 @@
 # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose

 # Parameters
-nc: 1  # number of classes
-kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -15,43 +15,43 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will ca
 # YOLOv8.0x6 backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [768, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 11
+  - [-1, 1, SPPF, [1024, 5]] # 11

 # YOLOv8.0x6 head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
-  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 17], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 14], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)

  - [-1, 1, Conv, [768, 3, 2]]
-  - [[-1, 11], 1, Concat, [1]]  # cat head P6
-  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)

-  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5, P6)
+  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-pose.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose.yaml
@ -2,8 +2,8 @@
 # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose

 # Parameters
-nc: 1  # number of classes
-kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
 scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -15,33 +15,33 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv8.0n head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 12], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 9], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)

-  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5)
+  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
@ -2,45 +2,45 @@
 # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
-  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
-  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
-  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
-  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
-  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs

 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv8.0n head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 12], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 9], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)

-  - [[15, 18, 21], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)
+  - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
@ -2,7 +2,7 @@
 # YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,43 +14,43 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' wil
 # YOLOv8.0x6 backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [768, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 11
+  - [-1, 1, SPPF, [1024, 5]] # 11

 # YOLOv8.0x6 head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
-  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 17], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 14], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)

  - [-1, 1, Conv, [768, 3, 2]]
-  - [[-1, 11], 1, Concat, [1]]  # cat head P6
-  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)

-  - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]]  # Pose(P3, P4, P5, P6)
+  - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-seg.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg.yaml
@ -2,7 +2,7 @@
 # YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
  # [depth, width, max_channels]
  n: [0.33, 0.25, 1024]
@ -14,33 +14,33 @@ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will c
 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv8.0n head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 12], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 9], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)

-  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-world.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-world.yaml
@ -0,0 +1,48 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+
+  - [[15, 12, 9], 1, ImagePoolingAttn, [256]] # 16 (P3/8-small)
+
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 19 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 22 (P5/32-large)
+
+  - [[15, 19, 22], 1, WorldDetect, [nc, 512, False]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, WorldDetect, [nc, 512, True]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8.yaml
+++ b/ultralytics/cfg/models/v8/yolov8.yaml
@ -2,45 +2,45 @@
 # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect

 # Parameters
-nc: 80  # number of classes
+nc: 80 # number of classes
 scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
  # [depth, width, max_channels]
-  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
-  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
-  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
-  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
-  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs

 # YOLOv8.0n backbone
 backbone:
  # [from, repeats, module, args]
-  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
-  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
  - [-1, 3, C2f, [128, True]]
-  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
  - [-1, 6, C2f, [256, True]]
-  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
  - [-1, 6, C2f, [512, True]]
-  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
  - [-1, 3, C2f, [1024, True]]
-  - [-1, 1, SPPF, [1024, 5]]  # 9
+  - [-1, 1, SPPF, [1024, 5]] # 9

 # YOLOv8.0n head
 head:
-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
-  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12

-  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
-  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
-  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)

  - [-1, 1, Conv, [256, 3, 2]]
-  - [[-1, 12], 1, Concat, [1]]  # cat head P4
-  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)

  - [-1, 1, Conv, [512, 3, 2]]
-  - [[-1, 9], 1, Concat, [1]]  # cat head P5
-  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)

-  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v9/yolov9c.yaml
+++ b/ultralytics/cfg/models/v9/yolov9c.yaml
@ -0,0 +1,36 @@
+# YOLOv9
+
+# parameters
+nc: 80  # number of classes
+
+# gelan backbone
+backbone:
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]]  # 2
+  - [-1, 1, ADown, [256]]  # 3-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]]  # 4
+  - [-1, 1, ADown, [512]]  # 5-P4/16
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 6
+  - [-1, 1, ADown, [512]]  # 7-P5/32
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 8
+  - [-1, 1, SPPELAN, [512, 256]]  # 9
+
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]]  # 15 (P3/8-small)
+
+  - [-1, 1, ADown, [256]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 18 (P4/16-medium)
+
+  - [-1, 1, ADown, [512]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]]  # DDetect(P3, P4, P5)
--- a/ultralytics/cfg/models/v9/yolov9e.yaml
+++ b/ultralytics/cfg/models/v9/yolov9e.yaml
@ -0,0 +1,60 @@
+# YOLOv9
+
+# parameters
+nc: 80  # number of classes
+
+# gelan backbone
+backbone:
+  - [-1, 1, Silence, []]
+  - [-1, 1, Conv, [64, 3, 2]]  # 1-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 2-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 3
+  - [-1, 1, ADown, [256]]  # 4-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 5
+  - [-1, 1, ADown, [512]]  # 6-P4/16
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 7
+  - [-1, 1, ADown, [1024]]  # 8-P5/32
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 9
+   
+  - [1, 1, CBLinear, [[64]]] # 10
+  - [3, 1, CBLinear, [[64, 128]]] # 11
+  - [5, 1, CBLinear, [[64, 128, 256]]] # 12
+  - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
+  - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
+  
+  - [0, 1, Conv, [64, 3, 2]]  # 15-P1/2
+  - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
+  - [-1, 1, Conv, [128, 3, 2]]  # 17-P2/4
+  - [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]] # 18  
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 19
+  - [-1, 1, ADown, [256]]  # 20-P3/8
+  - [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]] # 21  
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 22
+  - [-1, 1, ADown, [512]]  # 23-P4/16
+  - [[13, 14, -1], 1, CBFuse, [[3, 3]]] # 24 
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 25
+  - [-1, 1, ADown, [1024]]  # 26-P5/32
+  - [[14, -1], 1, CBFuse, [[4]]] # 27
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 28
+  - [-1, 1, SPPELAN, [512, 256]]  # 29
+
+# gelan head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 25], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 32
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 22], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]]  # 35 (P3/8-small)
+
+  - [-1, 1, ADown, [256]]
+  - [[-1, 32], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 38 (P4/16-medium)
+
+  - [-1, 1, ADown, [512]]
+  - [[-1, 29], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]]  # 41 (P5/32-large)
+
+   # detect
+  - [[35, 38, 41], 1, Detect, [nc]]  # Detect(P3, P4, P5)
--- a/ultralytics/cfg/trackers/botsort.yaml
+++ b/ultralytics/cfg/trackers/botsort.yaml
@ -1,17 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT

-tracker_type: botsort  # tracker type, ['botsort', 'bytetrack']
-track_high_thresh: 0.5  # threshold for the first association
-track_low_thresh: 0.1  # threshold for the second association
-new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
-track_buffer: 30  # buffer to calculate the time when to remove tracks
-match_thresh: 0.8  # threshold for matching tracks
+tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
 # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
 # mot20: False  # for tracker evaluation(not used for now)

 # BoT-SORT settings
-gmc_method: sparseOptFlow  # method of global motion compensation
+gmc_method: sparseOptFlow # method of global motion compensation
 # ReID model related thresh (not supported yet)
 proximity_thresh: 0.5
 appearance_thresh: 0.25
--- a/ultralytics/cfg/trackers/bytetrack.yaml
+++ b/ultralytics/cfg/trackers/bytetrack.yaml
@ -1,11 +1,11 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack

-tracker_type: bytetrack  # tracker type, ['botsort', 'bytetrack']
-track_high_thresh: 0.5  # threshold for the first association
-track_low_thresh: 0.1  # threshold for the second association
-new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
-track_buffer: 30  # buffer to calculate the time when to remove tracks
-match_thresh: 0.8  # threshold for matching tracks
+tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
 # min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
 # mot20: False  # for tracker evaluation(not used for now)
--- a/ultralytics/data/init.py
+++ b/ultralytics/data/init.py
@ -4,5 +4,12 @@ from .base import BaseDataset
 from .build import build_dataloader, build_yolo_dataset, load_inference_source
 from .dataset import ClassificationDataset, SemanticDataset, YOLODataset

-__all__ = ('BaseDataset', 'ClassificationDataset', 'SemanticDataset', 'YOLODataset', 'build_yolo_dataset',
-           'build_dataloader', 'load_inference_source')
+__all__ = (
+    "BaseDataset",
+    "ClassificationDataset",
+    "SemanticDataset",
+    "YOLODataset",
+    "build_yolo_dataset",
+    "build_dataloader",
+    "load_inference_source",
+)
--- a/ultralytics/data/pycache/init.cpython-312.pyc
+++ b/ultralytics/data/pycache/init.cpython-312.pyc
--- a/ultralytics/data/pycache/init.cpython-39.pyc
+++ b/ultralytics/data/pycache/init.cpython-39.pyc
--- a/ultralytics/data/pycache/augment.cpython-312.pyc
+++ b/ultralytics/data/pycache/augment.cpython-312.pyc
--- a/ultralytics/data/pycache/augment.cpython-39.pyc
+++ b/ultralytics/data/pycache/augment.cpython-39.pyc
--- a/ultralytics/data/pycache/base.cpython-312.pyc
+++ b/ultralytics/data/pycache/base.cpython-312.pyc
--- a/ultralytics/data/pycache/base.cpython-39.pyc
+++ b/ultralytics/data/pycache/base.cpython-39.pyc
--- a/ultralytics/data/pycache/build.cpython-312.pyc
+++ b/ultralytics/data/pycache/build.cpython-312.pyc
--- a/ultralytics/data/pycache/build.cpython-39.pyc
+++ b/ultralytics/data/pycache/build.cpython-39.pyc
--- a/ultralytics/data/pycache/converter.cpython-312.pyc
+++ b/ultralytics/data/pycache/converter.cpython-312.pyc
--- a/ultralytics/data/pycache/converter.cpython-39.pyc
+++ b/ultralytics/data/pycache/converter.cpython-39.pyc
--- a/ultralytics/data/pycache/dataset.cpython-312.pyc
+++ b/ultralytics/data/pycache/dataset.cpython-312.pyc
--- a/ultralytics/data/pycache/dataset.cpython-39.pyc
+++ b/ultralytics/data/pycache/dataset.cpython-39.pyc
--- a/ultralytics/data/pycache/loaders.cpython-312.pyc
+++ b/ultralytics/data/pycache/loaders.cpython-312.pyc
--- a/ultralytics/data/pycache/loaders.cpython-39.pyc
+++ b/ultralytics/data/pycache/loaders.cpython-39.pyc
--- a/ultralytics/data/pycache/utils.cpython-312.pyc
+++ b/ultralytics/data/pycache/utils.cpython-312.pyc
--- a/ultralytics/data/pycache/utils.cpython-39.pyc
+++ b/ultralytics/data/pycache/utils.cpython-39.pyc
--- a/ultralytics/data/annotator.py
+++ b/ultralytics/data/annotator.py
@ -5,7 +5,7 @@ from pathlib import Path
 from ultralytics import SAM, YOLO


-def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
+def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
    """
    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.

@ -29,7 +29,7 @@ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='',

    data = Path(data)
    if not output_dir:
-        output_dir = data.parent / f'{data.stem}_auto_annotate_labels'
+        output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
    Path(output_dir).mkdir(exist_ok=True, parents=True)

    det_results = det_model(data, stream=True, device=device)
@ -41,10 +41,10 @@ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='',
            sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
            segments = sam_results[0].masks.xyn  # noqa

-            with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
+            with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
                for i in range(len(segments)):
                    s = segments[i]
                    if len(s) == 0:
                        continue
                    segment = map(str, segments[i].reshape(-1).tolist())
-                    f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n')
+                    f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@ -15,7 +15,6 @@ import psutil
 from torch.utils.data import Dataset

 from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
-
 from .utils import HELP_URL, IMG_FORMATS


@ -47,20 +46,23 @@ class BaseDataset(Dataset):
        transforms (callable): Image transformation function.
    """

-    def __init__(self,
-                 img_path,
-                 imgsz=640,
-                 cache=False,
-                 augment=True,
-                 hyp=DEFAULT_CFG,
-                 prefix='',
-                 rect=False,
-                 batch_size=16,
-                 stride=32,
-                 pad=0.5,
-                 single_cls=False,
-                 classes=None,
-                 fraction=1.0):
+    def __init__(
+        self,
+        img_path,
+        imgsz=640,
+        cache=False,
+        augment=True,
+        hyp=DEFAULT_CFG,
+        prefix="",
+        rect=False,
+        batch_size=16,
+        stride=32,
+        pad=0.5,
+        single_cls=False,
+        classes=None,
+        fraction=1.0,
+    ):
+        """Initialize BaseDataset with given configuration and options."""
        super().__init__()
        self.img_path = img_path
        self.imgsz = imgsz
@ -84,11 +86,11 @@ class BaseDataset(Dataset):
        self.buffer = []  # buffer size = batch size
        self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0

-        # Cache stuff
-        if cache == 'ram' and not self.check_cache_ram():
+        # Cache images
+        if cache == "ram" and not self.check_cache_ram():
            cache = False
        self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
-        self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
+        self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
        if cache:
            self.cache_images(cache)

@ -102,54 +104,62 @@ class BaseDataset(Dataset):
            for p in img_path if isinstance(img_path, list) else [img_path]:
                p = Path(p)  # os-agnostic
                if p.is_dir():  # dir
-                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
                    # F = list(p.rglob('*.*'))  # pathlib
                elif p.is_file():  # file
                    with open(p) as t:
                        t = t.read().strip().splitlines()
                        parent = str(p.parent) + os.sep
-                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
+                        f += [x.replace("./", parent) if x.startswith("./") else x for x in t]  # local to global path
                        # F += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
                else:
-                    raise FileNotFoundError(f'{self.prefix}{p} does not exist')
-            im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
+                    raise FileNotFoundError(f"{self.prefix}{p} does not exist")
+            im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS)
            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
-            assert im_files, f'{self.prefix}No images found in {img_path}'
+            assert im_files, f"{self.prefix}No images found in {img_path}"
        except Exception as e:
-            raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e
+            raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n{HELP_URL}") from e
        if self.fraction < 1:
-            im_files = im_files[:round(len(im_files) * self.fraction)]
+            # im_files = im_files[: round(len(im_files) * self.fraction)]
+            num_elements_to_select = round(len(im_files) * self.fraction)
+            im_files = random.sample(im_files, num_elements_to_select)
        return im_files

    def update_labels(self, include_class: Optional[list]):
-        """include_class, filter labels to include only these classes (optional)."""
+        """Update labels to include only these classes (optional)."""
        include_class_array = np.array(include_class).reshape(1, -1)
        for i in range(len(self.labels)):
            if include_class is not None:
-                cls = self.labels[i]['cls']
-                bboxes = self.labels[i]['bboxes']
-                segments = self.labels[i]['segments']
-                keypoints = self.labels[i]['keypoints']
+                cls = self.labels[i]["cls"]
+                bboxes = self.labels[i]["bboxes"]
+                segments = self.labels[i]["segments"]
+                keypoints = self.labels[i]["keypoints"]
                j = (cls == include_class_array).any(1)
-                self.labels[i]['cls'] = cls[j]
-                self.labels[i]['bboxes'] = bboxes[j]
+                self.labels[i]["cls"] = cls[j]
+                self.labels[i]["bboxes"] = bboxes[j]
                if segments:
-                    self.labels[i]['segments'] = [segments[si] for si, idx in enumerate(j) if idx]
+                    self.labels[i]["segments"] = [segments[si] for si, idx in enumerate(j) if idx]
                if keypoints is not None:
-                    self.labels[i]['keypoints'] = keypoints[j]
+                    self.labels[i]["keypoints"] = keypoints[j]
            if self.single_cls:
-                self.labels[i]['cls'][:, 0] = 0
+                self.labels[i]["cls"][:, 0] = 0

    def load_image(self, i, rect_mode=True):
        """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
        if im is None:  # not cached in RAM
            if fn.exists():  # load npy
-                im = np.load(fn)
+                try:
+                    im = np.load(fn)
+                except Exception as e:
+                    LOGGER.warning(f"{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}")
+                    Path(fn).unlink(missing_ok=True)
+                    im = cv2.imread(f)  # BGR
            else:  # read image
                im = cv2.imread(f)  # BGR
-                if im is None:
-                    raise FileNotFoundError(f'Image Not Found {f}')
+            if im is None:
+                raise FileNotFoundError(f"Image Not Found {f}")
+
            h0, w0 = im.shape[:2]  # orig hw
            if rect_mode:  # resize long side to imgsz while maintaining aspect ratio
                r = self.imgsz / max(h0, w0)  # ratio
@ -174,17 +184,17 @@ class BaseDataset(Dataset):
    def cache_images(self, cache):
        """Cache images to memory or disk."""
        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
-        fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image
+        fcn = self.cache_images_to_disk if cache == "disk" else self.load_image
        with ThreadPool(NUM_THREADS) as pool:
            results = pool.imap(fcn, range(self.ni))
            pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
            for i, x in pbar:
-                if cache == 'disk':
+                if cache == "disk":
                    b += self.npy_files[i].stat().st_size
                else:  # 'ram'
                    self.ims[i], self.im_hw0[i], self.im_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
                    b += self.ims[i].nbytes
-                pbar.desc = f'{self.prefix}Caching images ({b / gb:.1f}GB {cache})'
+                pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {cache})"
            pbar.close()

    def cache_images_to_disk(self, i):
@ -200,15 +210,17 @@ class BaseDataset(Dataset):
        for _ in range(n):
            im = cv2.imread(random.choice(self.im_files))  # sample image
            ratio = self.imgsz / max(im.shape[0], im.shape[1])  # max(h, w)  # ratio
-            b += im.nbytes * ratio ** 2
+            b += im.nbytes * ratio**2
        mem_required = b * self.ni / n * (1 + safety_margin)  # GB required to cache dataset into RAM
        mem = psutil.virtual_memory()
        cache = mem_required < mem.available  # to cache or not to cache, that is the question
        if not cache:
-            LOGGER.info(f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
-                        f'with {int(safety_margin * 100)}% safety margin but only '
-                        f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
-                        f"{'caching images ✅' if cache else 'not caching images ⚠️'}")
+            LOGGER.info(
+                f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
+                f'with {int(safety_margin * 100)}% safety margin but only '
+                f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
+                f"{'caching images ✅' if cache else 'not caching images ⚠️'}"
+            )
        return cache

    def set_rectangle(self):
@ -216,7 +228,7 @@ class BaseDataset(Dataset):
        bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int)  # batch index
        nb = bi[-1] + 1  # number of batches

-        s = np.array([x.pop('shape') for x in self.labels])  # hw
+        s = np.array([x.pop("shape") for x in self.labels])  # hw
        ar = s[:, 0] / s[:, 1]  # aspect ratio
        irect = ar.argsort()
        self.im_files = [self.im_files[i] for i in irect]
@ -243,12 +255,14 @@ class BaseDataset(Dataset):
    def get_image_and_label(self, index):
        """Get and return label information from the dataset."""
        label = deepcopy(self.labels[index])  # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
-        label.pop('shape', None)  # shape is for rect, remove it
-        label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index)
-        label['ratio_pad'] = (label['resized_shape'][0] / label['ori_shape'][0],
-                              label['resized_shape'][1] / label['ori_shape'][1])  # for evaluation
+        label.pop("shape", None)  # shape is for rect, remove it
+        label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index)
+        label["ratio_pad"] = (
+            label["resized_shape"][0] / label["ori_shape"][0],
+            label["resized_shape"][1] / label["ori_shape"][1],
+        )  # for evaluation
        if self.rect:
-            label['rect_shape'] = self.batch_shapes[self.batch[index]]
+            label["rect_shape"] = self.batch_shapes[self.batch[index]]
        return self.update_labels_info(label)

    def __len__(self):
@ -256,24 +270,32 @@ class BaseDataset(Dataset):
        return len(self.labels)

    def update_labels_info(self, label):
-        """custom your label format here."""
+        """Custom your label format here."""
        return label

    def build_transforms(self, hyp=None):
-        """Users can custom augmentations here
-        like:
+        """
+        Users can customize augmentations here.
+
+        Example:
+            ```python
            if self.augment:
                # Training transforms
                return Compose([])
            else:
                # Val transforms
                return Compose([])
+            ```
        """
        raise NotImplementedError

    def get_labels(self):
-        """Users can custom their own format here.
-        Make sure your output is a list with each element like below:
+        """
+        Users can customize their own format here.
+
+        Note:
+            Ensure output is a dictionary with the following keys:
+            ```python
            dict(
                im_file=im_file,
                shape=shape,  # format: (height, width)
@ -284,5 +306,6 @@ class BaseDataset(Dataset):
                normalized=True, # or False
                bbox_format="xyxy",  # or xywh, ltwh
            )
+            ```
        """
        raise NotImplementedError
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@ -9,23 +9,34 @@ import torch
 from PIL import Image
 from torch.utils.data import dataloader, distributed

-from ultralytics.data.loaders import (LOADERS, LoadImages, LoadPilAndNumpy, LoadScreenshots, LoadStreams, LoadTensor,
-                                      SourceTypes, autocast_list)
+from ultralytics.data.loaders import (
+    LOADERS,
+    LoadImagesAndVideos,
+    LoadPilAndNumpy,
+    LoadScreenshots,
+    LoadStreams,
+    LoadTensor,
+    SourceTypes,
+    autocast_list,
+)
 from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
 from ultralytics.utils import RANK, colorstr
 from ultralytics.utils.checks import check_file
-
 from .dataset import YOLODataset
 from .utils import PIN_MEMORY


 class InfiniteDataLoader(dataloader.DataLoader):
-    """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader."""
+    """
+    Dataloader that reuses workers.
+
+    Uses same syntax as vanilla DataLoader.
+    """

    def __init__(self, *args, **kwargs):
        """Dataloader that infinitely recycles workers, inherits from DataLoader."""
        super().__init__(*args, **kwargs)
-        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
+        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
        self.iterator = super().__iter__()

    def __len__(self):
@ -38,7 +49,9 @@ class InfiniteDataLoader(dataloader.DataLoader):
            yield next(self.iterator)

    def reset(self):
-        """Reset iterator.
+        """
+        Reset iterator.
+
        This is useful when we want to modify settings of dataset while training.
        """
        self.iterator = self._get_iterator()
@ -64,49 +77,51 @@ class _RepeatSampler:

 def seed_worker(worker_id):  # noqa
    """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
-    worker_seed = torch.initial_seed() % 2 ** 32
+    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


-def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32):
-    """Build YOLO Dataset"""
+def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
+    """Build YOLO Dataset."""
    return YOLODataset(
        img_path=img_path,
        imgsz=cfg.imgsz,
        batch_size=batch,
-        augment=mode == 'train',  # augmentation
+        augment=mode == "train",  # augmentation
        hyp=cfg,  # TODO: probably add a get_hyps_from_cfg function
        rect=cfg.rect or rect,  # rectangular batches
        cache=cfg.cache or None,
        single_cls=cfg.single_cls or False,
        stride=int(stride),
-        pad=0.0 if mode == 'train' else 0.5,
-        prefix=colorstr(f'{mode}: '),
-        use_segments=cfg.task == 'segment',
-        use_keypoints=cfg.task == 'pose',
+        pad=0.0 if mode == "train" else 0.5,
+        prefix=colorstr(f"{mode}: "),
+        task=cfg.task,
        classes=cfg.classes,
        data=data,
-        fraction=cfg.fraction if mode == 'train' else 1.0)
+        fraction=cfg.fraction if mode == "train" else 1.0,
+    )


 def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
    """Return an InfiniteDataLoader or DataLoader for training or validation set."""
    batch = min(batch, len(dataset))
    nd = torch.cuda.device_count()  # number of CUDA devices
-    nw = min([os.cpu_count() // max(nd, 1), batch if batch > 1 else 0, workers])  # number of workers
+    nw = min([os.cpu_count() // max(nd, 1), workers])  # number of workers
    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
    generator = torch.Generator()
    generator.manual_seed(6148914691236517205 + RANK)
-    return InfiniteDataLoader(dataset=dataset,
-                              batch_size=batch,
-                              shuffle=shuffle and sampler is None,
-                              num_workers=nw,
-                              sampler=sampler,
-                              pin_memory=PIN_MEMORY,
-                              collate_fn=getattr(dataset, 'collate_fn', None),
-                              worker_init_fn=seed_worker,
-                              generator=generator)
+    return InfiniteDataLoader(
+        dataset=dataset,
+        batch_size=batch,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=PIN_MEMORY,
+        collate_fn=getattr(dataset, "collate_fn", None),
+        worker_init_fn=seed_worker,
+        generator=generator,
+    )


 def check_source(source):
@ -114,10 +129,10 @@ def check_source(source):
    webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
    if isinstance(source, (str, int, Path)):  # int for local usb camera
        source = str(source)
-        is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
-        is_url = source.lower().startswith(('https://', 'http://', 'rtsp://', 'rtmp://'))
-        webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
-        screenshot = source.lower() == 'screen'
+        is_file = Path(source).suffix[1:] in (IMG_FORMATS | VID_FORMATS)
+        is_url = source.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
+        webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
+        screenshot = source.lower() == "screen"
        if is_url and is_file:
            source = check_file(source)  # download
    elif isinstance(source, LOADERS):
@ -130,42 +145,42 @@ def check_source(source):
    elif isinstance(source, torch.Tensor):
        tensor = True
    else:
-        raise TypeError('Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict')
+        raise TypeError("Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict")

    return source, webcam, screenshot, from_img, in_memory, tensor


-def load_inference_source(source=None, imgsz=640, vid_stride=1, stream_buffer=False):
+def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False):
    """
    Loads an inference source for object detection and applies necessary transformations.

    Args:
        source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
-        imgsz (int, optional): The size of the image for inference. Default is 640.
+        batch (int, optional): Batch size for dataloaders. Default is 1.
        vid_stride (int, optional): The frame interval for video sources. Default is 1.
-        stream_buffer (bool, optional): Determined whether stream frames will be buffered. Default is False.
+        buffer (bool, optional): Determined whether stream frames will be buffered. Default is False.

    Returns:
        dataset (Dataset): A dataset object for the specified input source.
    """
-    source, webcam, screenshot, from_img, in_memory, tensor = check_source(source)
-    source_type = source.source_type if in_memory else SourceTypes(webcam, screenshot, from_img, tensor)
+    source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
+    source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)

    # Dataloader
    if tensor:
        dataset = LoadTensor(source)
    elif in_memory:
        dataset = source
-    elif webcam:
-        dataset = LoadStreams(source, imgsz=imgsz, vid_stride=vid_stride, stream_buffer=stream_buffer)
+    elif stream:
+        dataset = LoadStreams(source, vid_stride=vid_stride, buffer=buffer)
    elif screenshot:
-        dataset = LoadScreenshots(source, imgsz=imgsz)
+        dataset = LoadScreenshots(source)
    elif from_img:
-        dataset = LoadPilAndNumpy(source, imgsz=imgsz)
+        dataset = LoadPilAndNumpy(source)
    else:
-        dataset = LoadImages(source, imgsz=imgsz, vid_stride=vid_stride)
+        dataset = LoadImagesAndVideos(source, batch=batch, vid_stride=vid_stride)

    # Attach source types to the dataset
-    setattr(dataset, 'source_type', source_type)
+    setattr(dataset, "source_type", source_type)

    return dataset
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@ -1,31 +1,120 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

 import json
-import shutil
 from collections import defaultdict
 from pathlib import Path

 import cv2
 import numpy as np

-from ultralytics.utils import TQDM
+from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils.files import increment_path


 def coco91_to_coco80_class():
-    """Converts 91-index COCO class IDs to 80-index COCO class IDs.
+    """
+    Converts 91-index COCO class IDs to 80-index COCO class IDs.

    Returns:
        (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
            corresponding 91-index class ID.
    """
    return [
-        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
-        None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-        51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
-        None, 73, 74, 75, 76, 77, 78, 79, None]
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        None,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        None,
+        24,
+        25,
+        None,
+        None,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        None,
+        40,
+        41,
+        42,
+        43,
+        44,
+        45,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        None,
+        60,
+        None,
+        None,
+        61,
+        None,
+        62,
+        63,
+        64,
+        65,
+        66,
+        67,
+        68,
+        69,
+        70,
+        71,
+        72,
+        None,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        None,
+    ]


-def coco80_to_coco91_class():  #
+def coco80_to_coco91_class():
    """
    Converts 80-index (val2014) to 91-index (paper).
    For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
@ -41,16 +130,102 @@ def coco80_to_coco91_class():  #
        ```
    """
    return [
-        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
-        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-        64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        27,
+        28,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        43,
+        44,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        64,
+        65,
+        67,
+        70,
+        72,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        80,
+        81,
+        82,
+        84,
+        85,
+        86,
+        87,
+        88,
+        89,
+        90,
+    ]


-def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keypoints=False, cls91to80=True):
-    """Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+def convert_coco(
+    labels_dir="../coco/annotations/",
+    save_dir="coco_converted/",
+    use_segments=False,
+    use_keypoints=False,
+    cls91to80=True,
+):
+    """
+    Converts COCO dataset annotations to a YOLO annotation format  suitable for training YOLO models.

    Args:
        labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
+        save_dir (str, optional): Path to directory to save results to.
        use_segments (bool, optional): Whether to include segmentation masks in the output.
        use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
        cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
@ -67,78 +242,79 @@ def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keyp
    """

    # Create dataset directory
-    save_dir = Path('yolo_labels')
-    if save_dir.exists():
-        shutil.rmtree(save_dir)  # delete dir
-    for p in save_dir / 'labels', save_dir / 'images':
+    save_dir = increment_path(save_dir)  # increment if save directory already exists
+    for p in save_dir / "labels", save_dir / "images":
        p.mkdir(parents=True, exist_ok=True)  # make dir

    # Convert classes
    coco80 = coco91_to_coco80_class()

    # Import json
-    for json_file in sorted(Path(labels_dir).resolve().glob('*.json')):
-        fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '')  # folder name
+    for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
+        fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "")  # folder name
        fn.mkdir(parents=True, exist_ok=True)
        with open(json_file) as f:
            data = json.load(f)

        # Create image dict
-        images = {f'{x["id"]:d}': x for x in data['images']}
+        images = {f'{x["id"]:d}': x for x in data["images"]}
        # Create image-annotations dict
        imgToAnns = defaultdict(list)
-        for ann in data['annotations']:
-            imgToAnns[ann['image_id']].append(ann)
+        for ann in data["annotations"]:
+            imgToAnns[ann["image_id"]].append(ann)

        # Write labels file
-        for img_id, anns in TQDM(imgToAnns.items(), desc=f'Annotations {json_file}'):
-            img = images[f'{img_id:d}']
-            h, w, f = img['height'], img['width'], img['file_name']
+        for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
+            img = images[f"{img_id:d}"]
+            h, w, f = img["height"], img["width"], img["file_name"]

            bboxes = []
            segments = []
            keypoints = []
            for ann in anns:
-                if ann['iscrowd']:
+                if ann["iscrowd"]:
                    continue
                # The COCO box format is [top left x, top left y, width, height]
-                box = np.array(ann['bbox'], dtype=np.float64)
+                box = np.array(ann["bbox"], dtype=np.float64)
                box[:2] += box[2:] / 2  # xy top-left corner to center
                box[[0, 2]] /= w  # normalize x
                box[[1, 3]] /= h  # normalize y
                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                    continue

-                cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
+                cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1  # class
                box = [cls] + box.tolist()
                if box not in bboxes:
                    bboxes.append(box)
-                if use_segments and ann.get('segmentation') is not None:
-                    if len(ann['segmentation']) == 0:
-                        segments.append([])
-                        continue
-                    elif len(ann['segmentation']) > 1:
-                        s = merge_multi_segment(ann['segmentation'])
-                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
-                    else:
-                        s = [j for i in ann['segmentation'] for j in i]  # all segments concatenated
-                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
-                    s = [cls] + s
-                    if s not in segments:
+                    if use_segments and ann.get("segmentation") is not None:
+                        if len(ann["segmentation"]) == 0:
+                            segments.append([])
+                            continue
+                        elif len(ann["segmentation"]) > 1:
+                            s = merge_multi_segment(ann["segmentation"])
+                            s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
+                        else:
+                            s = [j for i in ann["segmentation"] for j in i]  # all segments concatenated
+                            s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
+                        s = [cls] + s
                        segments.append(s)
-                if use_keypoints and ann.get('keypoints') is not None:
-                    keypoints.append(box + (np.array(ann['keypoints']).reshape(-1, 3) /
-                                            np.array([w, h, 1])).reshape(-1).tolist())
+                    if use_keypoints and ann.get("keypoints") is not None:
+                        keypoints.append(
+                            box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
+                        )

            # Write
-            with open((fn / f).with_suffix('.txt'), 'a') as file:
+            with open((fn / f).with_suffix(".txt"), "a") as file:
                for i in range(len(bboxes)):
                    if use_keypoints:
-                        line = *(keypoints[i]),  # cls, box, keypoints
+                        line = (*(keypoints[i]),)  # cls, box, keypoints
                    else:
-                        line = *(segments[i]
-                                 if use_segments and len(segments[i]) > 0 else bboxes[i]),  # cls, box or segments
-                    file.write(('%g ' * len(line)).rstrip() % line + '\n')
+                        line = (
+                            *(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
+                        )  # cls, box or segments
+                    file.write(("%g " * len(line)).rstrip() % line + "\n")
+
+    LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")


 def convert_dota_to_yolo_obb(dota_root_path: str):
@ -160,48 +336,52 @@ def convert_dota_to_yolo_obb(dota_root_path: str):

    Notes:
        The directory structure assumed for the DOTA dataset:
-            - DOTA
-                - images
-                    - train
-                    - val
-                - labels
-                    - train_original
-                    - val_original

-        After the function execution, the new labels will be saved in:
            - DOTA
-                - labels
-                    - train
-                    - val
+                ├─ images
+                │   ├─ train
+                │   └─ val
+                └─ labels
+                    ├─ train_original
+                    └─ val_original
+
+        After execution, the function will organize the labels into:
+
+            - DOTA
+                └─ labels
+                    ├─ train
+                    └─ val
    """
    dota_root_path = Path(dota_root_path)

    # Class names to indices mapping
    class_mapping = {
-        'plane': 0,
-        'ship': 1,
-        'storage-tank': 2,
-        'baseball-diamond': 3,
-        'tennis-court': 4,
-        'basketball-court': 5,
-        'ground-track-field': 6,
-        'harbor': 7,
-        'bridge': 8,
-        'large-vehicle': 9,
-        'small-vehicle': 10,
-        'helicopter': 11,
-        'roundabout': 12,
-        'soccer ball-field': 13,
-        'swimming-pool': 14,
-        'container-crane': 15,
-        'airport': 16,
-        'helipad': 17}
+        "plane": 0,
+        "ship": 1,
+        "storage-tank": 2,
+        "baseball-diamond": 3,
+        "tennis-court": 4,
+        "basketball-court": 5,
+        "ground-track-field": 6,
+        "harbor": 7,
+        "bridge": 8,
+        "large-vehicle": 9,
+        "small-vehicle": 10,
+        "helicopter": 11,
+        "roundabout": 12,
+        "soccer-ball-field": 13,
+        "swimming-pool": 14,
+        "container-crane": 15,
+        "airport": 16,
+        "helipad": 17,
+    }

    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
-        orig_label_path = orig_label_dir / f'{image_name}.txt'
-        save_path = save_dir / f'{image_name}.txt'
+        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
+        orig_label_path = orig_label_dir / f"{image_name}.txt"
+        save_path = save_dir / f"{image_name}.txt"

-        with orig_label_path.open('r') as f, save_path.open('w') as g:
+        with orig_label_path.open("r") as f, save_path.open("w") as g:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
@ -211,20 +391,21 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
                class_idx = class_mapping[class_name]
                coords = [float(p) for p in parts[:8]]
                normalized_coords = [
-                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)]
-                formatted_coords = ['{:.6g}'.format(coord) for coord in normalized_coords]
+                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
+                ]
+                formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
                g.write(f"{class_idx} {' '.join(formatted_coords)}\n")

-    for phase in ['train', 'val']:
-        image_dir = dota_root_path / 'images' / phase
-        orig_label_dir = dota_root_path / 'labels' / f'{phase}_original'
-        save_dir = dota_root_path / 'labels' / phase
+    for phase in ["train", "val"]:
+        image_dir = dota_root_path / "images" / phase
+        orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
+        save_dir = dota_root_path / "labels" / phase

        save_dir.mkdir(parents=True, exist_ok=True)

        image_paths = list(image_dir.iterdir())
-        for image_path in TQDM(image_paths, desc=f'Processing {phase} images'):
-            if image_path.suffix != '.png':
+        for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
+            if image_path.suffix != ".png":
                continue
            image_name_without_ext = image_path.stem
            img = cv2.imread(str(image_path))
@ -237,8 +418,8 @@ def min_index(arr1, arr2):
    Find a pair of indexes with the shortest distance between two arrays of 2D points.

    Args:
-        arr1 (np.array): A NumPy array of shape (N, 2) representing N 2D points.
-        arr2 (np.array): A NumPy array of shape (M, 2) representing M 2D points.
+        arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points.
+        arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.

    Returns:
        (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
@ -263,31 +444,30 @@ def merge_multi_segment(segments):
    segments = [np.array(i).reshape(-1, 2) for i in segments]
    idx_list = [[] for _ in range(len(segments))]

-    # record the indexes with min distance between each segment
+    # Record the indexes with min distance between each segment
    for i in range(1, len(segments)):
        idx1, idx2 = min_index(segments[i - 1], segments[i])
        idx_list[i - 1].append(idx1)
        idx_list[i].append(idx2)

-    # use two round to connect all the segments
+    # Use two round to connect all the segments
    for k in range(2):
-        # forward connection
+        # Forward connection
        if k == 0:
            for i, idx in enumerate(idx_list):
-                # middle segments have two indexes
-                # reverse the index of middle segments
+                # Middle segments have two indexes, reverse the index of middle segments
                if len(idx) == 2 and idx[0] > idx[1]:
                    idx = idx[::-1]
                    segments[i] = segments[i][::-1, :]

                segments[i] = np.roll(segments[i], -idx[0], axis=0)
                segments[i] = np.concatenate([segments[i], segments[i][:1]])
-                # deal with the first segment and the last one
+                # Deal with the first segment and the last one
                if i in [0, len(idx_list) - 1]:
                    s.append(segments[i])
                else:
                    idx = [0, idx[1] - idx[0]]
-                    s.append(segments[i][idx[0]:idx[1] + 1])
+                    s.append(segments[i][idx[0] : idx[1] + 1])

        else:
            for i in range(len(idx_list) - 1, -1, -1):
@ -296,3 +476,67 @@ def merge_multi_segment(segments):
                    nidx = abs(idx[1] - idx[0])
                    s.append(segments[i][nidx:])
    return s
+
+
+def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
+    """
+    Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
+    in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
+
+    Args:
+        im_dir (str | Path): Path to image directory to convert.
+        save_dir (str | Path): Path to save the generated labels, labels will be saved
+            into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
+        sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
+
+    Notes:
+        The input directory structure assumed for dataset:
+
+            - im_dir
+                ├─ 001.jpg
+                ├─ ..
+                └─ NNN.jpg
+            - labels
+                ├─ 001.txt
+                ├─ ..
+                └─ NNN.txt
+    """
+    from ultralytics.data import YOLODataset
+    from ultralytics.utils.ops import xywh2xyxy
+    from ultralytics.utils import LOGGER
+    from ultralytics import SAM
+    from tqdm import tqdm
+
+    # NOTE: add placeholder to pass class index check
+    dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))
+    if len(dataset.labels[0]["segments"]) > 0:  # if it's segment data
+        LOGGER.info("Segmentation labels detected, no need to generate new ones!")
+        return
+
+    LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
+    sam_model = SAM(sam_model)
+    for l in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
+        h, w = l["shape"]
+        boxes = l["bboxes"]
+        if len(boxes) == 0:  # skip empty labels
+            continue
+        boxes[:, [0, 2]] *= w
+        boxes[:, [1, 3]] *= h
+        im = cv2.imread(l["im_file"])
+        sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
+        l["segments"] = sam_results[0].masks.xyn
+
+    save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    for l in dataset.labels:
+        texts = []
+        lb_name = Path(l["im_file"]).with_suffix(".txt").name
+        txt_file = save_dir / lb_name
+        cls = l["cls"]
+        for i, s in enumerate(l["segments"]):
+            line = (int(cls[i]), *s.reshape(-1))
+            texts.append(("%g " * len(line)).rstrip() % line)
+        if texts:
+            with open(txt_file, "a") as f:
+                f.writelines(text + "\n" for text in texts)
+    LOGGER.info(f"Generated segment labels saved in {save_dir}")
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@ -8,15 +8,16 @@ import cv2
 import numpy as np
 import torch
 import torchvision
+from PIL import Image

 from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
-
-from .augment import Compose, Format, Instances, LetterBox, classify_albumentations, classify_transforms, v8_transforms
+from ultralytics.utils.ops import resample_segments
+from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
 from .base import BaseDataset
 from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label

 # Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
-DATASET_CACHE_VERSION = '1.0.3'
+DATASET_CACHE_VERSION = "1.0.3"


 class YOLODataset(BaseDataset):
@ -25,40 +26,54 @@ class YOLODataset(BaseDataset):

    Args:
        data (dict, optional): A dataset YAML dictionary. Defaults to None.
-        use_segments (bool, optional): If True, segmentation masks are used as labels. Defaults to False.
-        use_keypoints (bool, optional): If True, keypoints are used as labels. Defaults to False.
+        task (str): An explicit arg to point current task, Defaults to 'detect'.

    Returns:
        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
    """

-    def __init__(self, *args, data=None, use_segments=False, use_keypoints=False, **kwargs):
-        self.use_segments = use_segments
-        self.use_keypoints = use_keypoints
+    def __init__(self, *args, data=None, task="detect", **kwargs):
+        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
+        self.use_segments = task == "segment"
+        self.use_keypoints = task == "pose"
+        self.use_obb = task == "obb"
        self.data = data
-        assert not (self.use_segments and self.use_keypoints), 'Can not use both segments and keypoints.'
+        assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
        super().__init__(*args, **kwargs)

-    def cache_labels(self, path=Path('./labels.cache')):
-        """Cache dataset labels, check images and read shapes.
+    def cache_labels(self, path=Path("./labels.cache")):
+        """
+        Cache dataset labels, check images and read shapes.
+
        Args:
-            path (Path): path where to save the cache file (default: Path('./labels.cache')).
+            path (Path): Path where to save the cache file. Default is Path('./labels.cache').
+
        Returns:
            (dict): labels.
        """
-        x = {'labels': []}
+        x = {"labels": []}
        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
-        desc = f'{self.prefix}Scanning {path.parent / path.stem}...'
+        desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
        total = len(self.im_files)
-        nkpt, ndim = self.data.get('kpt_shape', (0, 0))
+        nkpt, ndim = self.data.get("kpt_shape", (0, 0))
        if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
-            raise ValueError("'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
-                             "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'")
+            raise ValueError(
+                "'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'"
+            )
        with ThreadPool(NUM_THREADS) as pool:
-            results = pool.imap(func=verify_image_label,
-                                iterable=zip(self.im_files, self.label_files, repeat(self.prefix),
-                                             repeat(self.use_keypoints), repeat(len(self.data['names'])), repeat(nkpt),
-                                             repeat(ndim)))
+            results = pool.imap(
+                func=verify_image_label,
+                iterable=zip(
+                    self.im_files,
+                    self.label_files,
+                    repeat(self.prefix),
+                    repeat(self.use_keypoints),
+                    repeat(len(self.data["names"])),
+                    repeat(nkpt),
+                    repeat(ndim),
+                ),
+            )
            pbar = TQDM(results, desc=desc, total=total)
            for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
                nm += nm_f
@ -66,7 +81,7 @@ class YOLODataset(BaseDataset):
                ne += ne_f
                nc += nc_f
                if im_file:
-                    x['labels'].append(
+                    x["labels"].append(
                        dict(
                            im_file=im_file,
                            shape=shape,
@ -75,60 +90,63 @@ class YOLODataset(BaseDataset):
                            segments=segments,
                            keypoints=keypoint,
                            normalized=True,
-                            bbox_format='xywh'))
+                            bbox_format="xywh",
+                        )
+                    )
                if msg:
                    msgs.append(msg)
-                pbar.desc = f'{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt'
+                pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt"
            pbar.close()

        if msgs:
-            LOGGER.info('\n'.join(msgs))
+            LOGGER.info("\n".join(msgs))
        if nf == 0:
-            LOGGER.warning(f'{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}')
-        x['hash'] = get_hash(self.label_files + self.im_files)
-        x['results'] = nf, nm, ne, nc, len(self.im_files)
-        x['msgs'] = msgs  # warnings
+            LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}")
+        x["hash"] = get_hash(self.label_files + self.im_files)
+        x["results"] = nf, nm, ne, nc, len(self.im_files)
+        x["msgs"] = msgs  # warnings
        save_dataset_cache_file(self.prefix, path, x)
        return x

    def get_labels(self):
        """Returns dictionary of labels for YOLO training."""
        self.label_files = img2label_paths(self.im_files)
-        cache_path = Path(self.label_files[0]).parent.with_suffix('.cache')
+        cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
        try:
            cache, exists = load_dataset_cache_file(cache_path), True  # attempt to load a *.cache file
-            assert cache['version'] == DATASET_CACHE_VERSION  # matches current version
-            assert cache['hash'] == get_hash(self.label_files + self.im_files)  # identical hash
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash(self.label_files + self.im_files)  # identical hash
        except (FileNotFoundError, AssertionError, AttributeError):
            cache, exists = self.cache_labels(cache_path), False  # run cache ops

        # Display cache
-        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupt, total
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupt, total
        if exists and LOCAL_RANK in (-1, 0):
-            d = f'Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt'
+            d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt"
            TQDM(None, desc=self.prefix + d, total=n, initial=n)  # display results
-            if cache['msgs']:
-                LOGGER.info('\n'.join(cache['msgs']))  # display warnings
+            if cache["msgs"]:
+                LOGGER.info("\n".join(cache["msgs"]))  # display warnings

        # Read cache
-        [cache.pop(k) for k in ('hash', 'version', 'msgs')]  # remove items
-        labels = cache['labels']
+        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
+        labels = cache["labels"]
        if not labels:
-            LOGGER.warning(f'WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}')
-        self.im_files = [lb['im_file'] for lb in labels]  # update im_files
+            LOGGER.warning(f"WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}")
+        self.im_files = [lb["im_file"] for lb in labels]  # update im_files

        # Check if the dataset is all boxes or all segments
-        lengths = ((len(lb['cls']), len(lb['bboxes']), len(lb['segments'])) for lb in labels)
+        lengths = ((len(lb["cls"]), len(lb["bboxes"]), len(lb["segments"])) for lb in labels)
        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
        if len_segments and len_boxes != len_segments:
            LOGGER.warning(
-                f'WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, '
-                f'len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. '
-                'To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.')
+                f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
+                f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
+                "To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset."
+            )
            for lb in labels:
-                lb['segments'] = []
+                lb["segments"] = []
        if len_cls == 0:
-            LOGGER.warning(f'WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}')
+            LOGGER.warning(f"WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}")
        return labels

    def build_transforms(self, hyp=None):
@ -140,13 +158,18 @@ class YOLODataset(BaseDataset):
        else:
            transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
        transforms.append(
-            Format(bbox_format='xywh',
-                   normalize=True,
-                   return_mask=self.use_segments,
-                   return_keypoint=self.use_keypoints,
-                   batch_idx=True,
-                   mask_ratio=hyp.mask_ratio,
-                   mask_overlap=hyp.overlap_mask))
+            Format(
+                bbox_format="xywh",
+                normalize=True,
+                return_mask=self.use_segments,
+                return_keypoint=self.use_keypoints,
+                return_obb=self.use_obb,
+                batch_idx=True,
+                mask_ratio=hyp.mask_ratio,
+                mask_overlap=hyp.overlap_mask,
+                bgr=hyp.bgr if self.augment else 0.0,  # only affect training.
+            )
+        )
        return transforms

    def close_mosaic(self, hyp):
@ -157,15 +180,28 @@ class YOLODataset(BaseDataset):
        self.transforms = self.build_transforms(hyp)

    def update_labels_info(self, label):
-        """custom your label format here."""
-        # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label
-        # we can make it also support classification and semantic segmentation by add or remove some dict keys there.
-        bboxes = label.pop('bboxes')
-        segments = label.pop('segments')
-        keypoints = label.pop('keypoints', None)
-        bbox_format = label.pop('bbox_format')
-        normalized = label.pop('normalized')
-        label['instances'] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
+        """
+        Custom your label format here.
+
+        Note:
+            cls is not with bboxes now, classification and semantic segmentation need an independent cls label
+            Can also support classification and semantic segmentation by adding or removing dict keys there.
+        """
+        bboxes = label.pop("bboxes")
+        segments = label.pop("segments", [])
+        keypoints = label.pop("keypoints", None)
+        bbox_format = label.pop("bbox_format")
+        normalized = label.pop("normalized")
+
+        # NOTE: do NOT resample oriented boxes
+        segment_resamples = 100 if self.use_obb else 1000
+        if len(segments) > 0:
+            # list[np.array(1000, 2)] * num_samples
+            # (N, 1000, 2)
+            segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
+        else:
+            segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
+        label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
        return label

    @staticmethod
@ -176,65 +212,75 @@ class YOLODataset(BaseDataset):
        values = list(zip(*[list(b.values()) for b in batch]))
        for i, k in enumerate(keys):
            value = values[i]
-            if k == 'img':
+            if k == "img":
                value = torch.stack(value, 0)
-            if k in ['masks', 'keypoints', 'bboxes', 'cls']:
+            if k in ["masks", "keypoints", "bboxes", "cls", "segments", "obb"]:
                value = torch.cat(value, 0)
            new_batch[k] = value
-        new_batch['batch_idx'] = list(new_batch['batch_idx'])
-        for i in range(len(new_batch['batch_idx'])):
-            new_batch['batch_idx'][i] += i  # add target image index for build_targets()
-        new_batch['batch_idx'] = torch.cat(new_batch['batch_idx'], 0)
+        new_batch["batch_idx"] = list(new_batch["batch_idx"])
+        for i in range(len(new_batch["batch_idx"])):
+            new_batch["batch_idx"][i] += i  # add target image index for build_targets()
+        new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0)
        return new_batch


 # Classification dataloaders -------------------------------------------------------------------------------------------
 class ClassificationDataset(torchvision.datasets.ImageFolder):
    """
-    YOLO Classification Dataset.
+    Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
+    augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
+    learning models, with optional image transformations and caching mechanisms to speed up training.

-    Args:
-        root (str): Dataset path.
+    This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
+    in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
+    to ensure data integrity and consistency.

    Attributes:
-        cache_ram (bool): True if images should be cached in RAM, False otherwise.
-        cache_disk (bool): True if images should be cached on disk, False otherwise.
-        samples (list): List of samples containing file, index, npy, and im.
-        torch_transforms (callable): torchvision transforms applied to the dataset.
-        album_transforms (callable, optional): Albumentations transforms applied to the dataset if augment is True.
+        cache_ram (bool): Indicates if caching in RAM is enabled.
+        cache_disk (bool): Indicates if caching on disk is enabled.
+        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
+                        file (if caching on disk), and optionally the loaded image array (if caching in RAM).
+        torch_transforms (callable): PyTorch transforms to be applied to the images.
    """

-    def __init__(self, root, args, augment=False, cache=False, prefix=''):
+    def __init__(self, root, args, augment=False, prefix=""):
        """
        Initialize YOLO object with root, image size, augmentations, and cache settings.

        Args:
-            root (str): Dataset path.
-            args (Namespace): Argument parser containing dataset related settings.
-            augment (bool, optional): True if dataset should be augmented, False otherwise. Defaults to False.
-            cache (bool | str | optional): Cache setting, can be True, False, 'ram' or 'disk'. Defaults to False.
+            root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
+            args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
+                parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
+                of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
+                `auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
+            augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
+            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
+                debugging. Default is an empty string.
        """
        super().__init__(root=root)
        if augment and args.fraction < 1.0:  # reduce training fraction
-            self.samples = self.samples[:round(len(self.samples) * args.fraction)]
-        self.prefix = colorstr(f'{prefix}: ') if prefix else ''
-        self.cache_ram = cache is True or cache == 'ram'
-        self.cache_disk = cache == 'disk'
+            self.samples = self.samples[: round(len(self.samples) * args.fraction)]
+        self.prefix = colorstr(f"{prefix}: ") if prefix else ""
+        self.cache_ram = args.cache is True or args.cache == "ram"  # cache images into RAM
+        self.cache_disk = args.cache == "disk"  # cache images on hard drive as uncompressed *.npy files
        self.samples = self.verify_images()  # filter out bad images
-        self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples]  # file, index, npy, im
-        self.torch_transforms = classify_transforms(args.imgsz)
-        self.album_transforms = classify_albumentations(
-            augment=augment,
-            size=args.imgsz,
-            scale=(1.0 - args.scale, 1.0),  # (0.08, 1.0)
-            hflip=args.fliplr,
-            vflip=args.flipud,
-            hsv_h=args.hsv_h,  # HSV-Hue augmentation (fraction)
-            hsv_s=args.hsv_s,  # HSV-Saturation augmentation (fraction)
-            hsv_v=args.hsv_v,  # HSV-Value augmentation (fraction)
-            mean=(0.0, 0.0, 0.0),  # IMAGENET_MEAN
-            std=(1.0, 1.0, 1.0),  # IMAGENET_STD
-            auto_aug=False) if augment else None
+        self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
+        scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
+        self.torch_transforms = (
+            classify_augmentations(
+                size=args.imgsz,
+                scale=scale,
+                hflip=args.fliplr,
+                vflip=args.flipud,
+                erasing=args.erasing,
+                auto_augment=args.auto_augment,
+                hsv_h=args.hsv_h,
+                hsv_s=args.hsv_s,
+                hsv_v=args.hsv_v,
+            )
+            if augment
+            else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
+        )

    def __getitem__(self, i):
        """Returns subset of data and targets corresponding to given indices."""
@ -247,30 +293,30 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
            im = np.load(fn)
        else:  # read image
            im = cv2.imread(f)  # BGR
-        if self.album_transforms:
-            sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))['image']
-        else:
-            sample = self.torch_transforms(im)
-        return {'img': sample, 'cls': j}
+        # Convert NumPy array to PIL image
+        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
+        sample = self.torch_transforms(im)
+        return {"img": sample, "cls": j}

    def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
        return len(self.samples)

    def verify_images(self):
        """Verify all images in dataset."""
-        desc = f'{self.prefix}Scanning {self.root}...'
-        path = Path(self.root).with_suffix('.cache')  # *.cache file path
+        desc = f"{self.prefix}Scanning {self.root}..."
+        path = Path(self.root).with_suffix(".cache")  # *.cache file path

        with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
            cache = load_dataset_cache_file(path)  # attempt to load a *.cache file
-            assert cache['version'] == DATASET_CACHE_VERSION  # matches current version
-            assert cache['hash'] == get_hash([x[0] for x in self.samples])  # identical hash
-            nf, nc, n, samples = cache.pop('results')  # found, missing, empty, corrupt, total
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash([x[0] for x in self.samples])  # identical hash
+            nf, nc, n, samples = cache.pop("results")  # found, missing, empty, corrupt, total
            if LOCAL_RANK in (-1, 0):
-                d = f'{desc} {nf} images, {nc} corrupt'
+                d = f"{desc} {nf} images, {nc} corrupt"
                TQDM(None, desc=d, total=n, initial=n)
-                if cache['msgs']:
-                    LOGGER.info('\n'.join(cache['msgs']))  # display warnings
+                if cache["msgs"]:
+                    LOGGER.info("\n".join(cache["msgs"]))  # display warnings
            return samples

        # Run scan if *.cache retrieval failed
@ -285,13 +331,13 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
                    msgs.append(msg)
                nf += nf_f
                nc += nc_f
-                pbar.desc = f'{desc} {nf} images, {nc} corrupt'
+                pbar.desc = f"{desc} {nf} images, {nc} corrupt"
            pbar.close()
        if msgs:
-            LOGGER.info('\n'.join(msgs))
-        x['hash'] = get_hash([x[0] for x in self.samples])
-        x['results'] = nf, nc, len(samples), samples
-        x['msgs'] = msgs  # warnings
+            LOGGER.info("\n".join(msgs))
+        x["hash"] = get_hash([x[0] for x in self.samples])
+        x["results"] = nf, nc, len(samples), samples
+        x["msgs"] = msgs  # warnings
        save_dataset_cache_file(self.prefix, path, x)
        return samples

@ -299,6 +345,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
 def load_dataset_cache_file(path):
    """Load an Ultralytics *.cache dictionary from path."""
    import gc
+
    gc.disable()  # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
    cache = np.load(str(path), allow_pickle=True).item()  # load dict
    gc.enable()
@ -307,19 +354,29 @@ def load_dataset_cache_file(path):

 def save_dataset_cache_file(prefix, path, x):
    """Save an Ultralytics dataset *.cache dictionary x to path."""
-    x['version'] = DATASET_CACHE_VERSION  # add cache version
+    x["version"] = DATASET_CACHE_VERSION  # add cache version
    if is_dir_writeable(path.parent):
        if path.exists():
            path.unlink()  # remove *.cache file if exists
        np.save(str(path), x)  # save cache for next time
-        path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
-        LOGGER.info(f'{prefix}New cache created: {path}')
+        path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+        LOGGER.info(f"{prefix}New cache created: {path}")
    else:
-        LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.')
+        LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")


 # TODO: support semantic segmentation
 class SemanticDataset(BaseDataset):
+    """
+    Semantic Segmentation Dataset.
+
+    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+    from the BaseDataset class.
+
+    Note:
+        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+        semantic segmentation tasks.
+    """

    def __init__(self):
        """Initialize a SemanticDataset object."""
--- a/ultralytics/data/explorer/init.py
+++ b/ultralytics/data/explorer/init.py
@ -0,0 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from .utils import plot_query_result
+
+__all__ = ["plot_query_result"]
--- a/ultralytics/data/explorer/pycache/init.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/init.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/init.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/init.cpython-39.pyc
--- a/ultralytics/data/explorer/pycache/explorer.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/explorer.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/explorer.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/explorer.cpython-39.pyc
--- a/ultralytics/data/explorer/pycache/utils.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/utils.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/utils.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/utils.cpython-39.pyc
--- a/Show More
+++ b/Show More