回传数据解析，兼容v5和v10

2025-04-18 14:41:53 +08:00
commit 010f5c445a
888 changed files with 93632 additions and 0 deletions
--- a/ultralytics/init.py
+++ b/ultralytics/init.py
@ -0,0 +1,27 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+__version__ = "8.1.34"
+
+from ultralytics.data.explorer.explorer import Explorer
+from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
+from ultralytics.models.fastsam import FastSAM
+from ultralytics.models.nas import NAS
+from ultralytics.utils import ASSETS, SETTINGS as settings
+from ultralytics.utils.checks import check_yolo as checks
+from ultralytics.utils.downloads import download
+
+__all__ = (
+    "__version__",
+    "ASSETS",
+    "YOLO",
+    "YOLOWorld",
+    "NAS",
+    "SAM",
+    "FastSAM",
+    "RTDETR",
+    "checks",
+    "download",
+    "settings",
+    "Explorer",
+    "YOLOv10"
+)
--- a/ultralytics/pycache/init.cpython-312.pyc
+++ b/ultralytics/pycache/init.cpython-312.pyc
--- a/ultralytics/pycache/init.cpython-39.pyc
+++ b/ultralytics/pycache/init.cpython-39.pyc
--- a/ultralytics/cfg/init.py
+++ b/ultralytics/cfg/init.py
@ -0,0 +1,613 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import contextlib
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Dict, List, Union
+import re
+
+from ultralytics.utils import (
+    ASSETS,
+    DEFAULT_CFG,
+    DEFAULT_CFG_DICT,
+    DEFAULT_CFG_PATH,
+    LOGGER,
+    RANK,
+    ROOT,
+    RUNS_DIR,
+    SETTINGS,
+    SETTINGS_YAML,
+    TESTS_RUNNING,
+    IterableSimpleNamespace,
+    __version__,
+    checks,
+    colorstr,
+    deprecation_warn,
+    yaml_load,
+    yaml_print,
+)
+
+# Define valid tasks and modes
+MODES = {"train", "val", "predict", "export", "track", "benchmark"}
+TASKS = {"detect", "segment", "classify", "pose", "obb"}
+TASK2DATA = {
+    "detect": "coco8.yaml",
+    "segment": "coco8-seg.yaml",
+    "classify": "imagenet10",
+    "pose": "coco8-pose.yaml",
+    "obb": "dota8.yaml",
+}
+TASK2MODEL = {
+    "detect": "yolov8n.pt",
+    "segment": "yolov8n-seg.pt",
+    "classify": "yolov8n-cls.pt",
+    "pose": "yolov8n-pose.pt",
+    "obb": "yolov8n-obb.pt",
+}
+TASK2METRIC = {
+    "detect": "metrics/mAP50-95(B)",
+    "segment": "metrics/mAP50-95(M)",
+    "classify": "metrics/accuracy_top1",
+    "pose": "metrics/mAP50-95(P)",
+    "obb": "metrics/mAP50-95(B)",
+}
+
+CLI_HELP_MSG = f"""
+    Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
+
+        yolo TASK MODE ARGS
+
+        Where   TASK (optional) is one of {TASKS}
+                MODE (required) is one of {MODES}
+                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
+                    See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
+
+    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
+        yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
+
+    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
+        yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
+
+    3. Val a pretrained detection model at batch-size 1 and image size 640:
+        yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
+
+    4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
+        yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
+
+    6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
+        yolo explorer
+
+    5. Run special commands:
+        yolo help
+        yolo checks
+        yolo version
+        yolo settings
+        yolo copy-cfg
+        yolo cfg
+
+    Docs: https://docs.ultralytics.com
+    Community: https://community.ultralytics.com
+    GitHub: https://github.com/ultralytics/ultralytics
+    """
+
+# Define keys for arg type checks
+CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
+CFG_FRACTION_KEYS = {
+    "dropout",
+    "iou",
+    "lr0",
+    "lrf",
+    "momentum",
+    "weight_decay",
+    "warmup_momentum",
+    "warmup_bias_lr",
+    "label_smoothing",
+    "hsv_h",
+    "hsv_s",
+    "hsv_v",
+    "translate",
+    "scale",
+    "perspective",
+    "flipud",
+    "fliplr",
+    "bgr",
+    "mosaic",
+    "mixup",
+    "copy_paste",
+    "conf",
+    "iou",
+    "fraction",
+}  # fraction floats 0.0 - 1.0
+CFG_INT_KEYS = {
+    "epochs",
+    "patience",
+    "batch",
+    "workers",
+    "seed",
+    "close_mosaic",
+    "mask_ratio",
+    "max_det",
+    "vid_stride",
+    "line_width",
+    "workspace",
+    "nbs",
+    "save_period",
+}
+CFG_BOOL_KEYS = {
+    "save",
+    "exist_ok",
+    "verbose",
+    "deterministic",
+    "single_cls",
+    "rect",
+    "cos_lr",
+    "overlap_mask",
+    "val",
+    "save_json",
+    "save_hybrid",
+    "half",
+    "dnn",
+    "plots",
+    "show",
+    "save_txt",
+    "save_conf",
+    "save_crop",
+    "save_frames",
+    "show_labels",
+    "show_conf",
+    "visualize",
+    "augment",
+    "agnostic_nms",
+    "retina_masks",
+    "show_boxes",
+    "keras",
+    "optimize",
+    "int8",
+    "dynamic",
+    "simplify",
+    "nms",
+    "profile",
+    "multi_scale",
+}
+
+
+def cfg2dict(cfg):
+    """
+    Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
+
+    Args:
+        cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
+
+    Returns:
+        cfg (dict): Configuration object in dictionary format.
+    """
+    if isinstance(cfg, (str, Path)):
+        cfg = yaml_load(cfg)  # load dict
+    elif isinstance(cfg, SimpleNamespace):
+        cfg = vars(cfg)  # convert to dict
+    return cfg
+
+
+def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
+    """
+    Load and merge configuration data from a file or dictionary.
+
+    Args:
+        cfg (str | Path | Dict | SimpleNamespace): Configuration data.
+        overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
+
+    Returns:
+        (SimpleNamespace): Training arguments namespace.
+    """
+    cfg = cfg2dict(cfg)
+
+    # Merge overrides
+    if overrides:
+        overrides = cfg2dict(overrides)
+        if "save_dir" not in cfg:
+            overrides.pop("save_dir", None)  # special override keys to ignore
+        check_dict_alignment(cfg, overrides)
+        cfg = {**cfg, **overrides}  # merge cfg and overrides dicts (prefer overrides)
+
+    # Special handling for numeric project/name
+    for k in "project", "name":
+        if k in cfg and isinstance(cfg[k], (int, float)):
+            cfg[k] = str(cfg[k])
+    if cfg.get("name") == "model":  # assign model to 'name' arg
+        cfg["name"] = cfg.get("model", "").split(".")[0]
+        LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
+
+    # Type and Value checks
+    check_cfg(cfg)
+
+    # Return instance
+    return IterableSimpleNamespace(**cfg)
+
+
+def check_cfg(cfg, hard=True):
+    """Check Ultralytics configuration argument types and values."""
+    for k, v in cfg.items():
+        if v is not None:  # None values may be from optional args
+            if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                    )
+                cfg[k] = float(v)
+            elif k in CFG_FRACTION_KEYS:
+                if not isinstance(v, (int, float)):
+                    if hard:
+                        raise TypeError(
+                            f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                            f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                        )
+                    cfg[k] = v = float(v)
+                if not (0.0 <= v <= 1.0):
+                    raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
+            elif k in CFG_INT_KEYS and not isinstance(v, int):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
+                    )
+                cfg[k] = int(v)
+            elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
+                    )
+                cfg[k] = bool(v)
+
+
+def get_save_dir(args, name=None):
+    """Return save_dir as created from train/val/predict arguments."""
+
+    if getattr(args, "save_dir", None):
+        save_dir = args.save_dir
+    else:
+        from ultralytics.utils.files import increment_path
+
+        project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
+        name = name or args.name or f"{args.mode}"
+        save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
+
+    return Path(save_dir)
+
+
+def _handle_deprecation(custom):
+    """Hardcoded function to handle deprecated config keys."""
+
+    for key in custom.copy().keys():
+        if key == "boxes":
+            deprecation_warn(key, "show_boxes")
+            custom["show_boxes"] = custom.pop("boxes")
+        if key == "hide_labels":
+            deprecation_warn(key, "show_labels")
+            custom["show_labels"] = custom.pop("hide_labels") == "False"
+        if key == "hide_conf":
+            deprecation_warn(key, "show_conf")
+            custom["show_conf"] = custom.pop("hide_conf") == "False"
+        if key == "line_thickness":
+            deprecation_warn(key, "line_width")
+            custom["line_width"] = custom.pop("line_thickness")
+
+    return custom
+
+
+def check_dict_alignment(base: Dict, custom: Dict, e=None):
+    """
+    This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+    any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+
+    Args:
+        custom (dict): a dictionary of custom configuration options
+        base (dict): a dictionary of base configuration options
+        e (Error, optional): An optional error that is passed by the calling function.
+    """
+    custom = _handle_deprecation(custom)
+    base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
+    mismatched = [k for k in custom_keys if k not in base_keys]
+    if mismatched:
+        from difflib import get_close_matches
+
+        string = ""
+        for x in mismatched:
+            matches = get_close_matches(x, base_keys)  # key list
+            matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
+            match_str = f"Similar arguments are i.e. {matches}." if matches else ""
+            string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
+        raise SyntaxError(string + CLI_HELP_MSG) from e
+
+
+def merge_equals_args(args: List[str]) -> List[str]:
+    """
+    Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+    argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
+
+    Args:
+        args (List[str]): A list of strings where each element is an argument.
+
+    Returns:
+        (List[str]): A list of strings where the arguments around isolated '=' are merged.
+    """
+    new_args = []
+    for i, arg in enumerate(args):
+        if arg == "=" and 0 < i < len(args) - 1:  # merge ['arg', '=', 'val']
+            new_args[-1] += f"={args[i + 1]}"
+            del args[i + 1]
+        elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]:  # merge ['arg=', 'val']
+            new_args.append(f"{arg}{args[i + 1]}")
+            del args[i + 1]
+        elif arg.startswith("=") and i > 0:  # merge ['arg', '=val']
+            new_args[-1] += arg
+        else:
+            new_args.append(arg)
+    return new_args
+
+
+def handle_yolo_hub(args: List[str]) -> None:
+    """
+    Handle Ultralytics HUB command-line interface (CLI) commands.
+
+    This function processes Ultralytics HUB CLI commands such as login and logout.
+    It should be called when executing a script with arguments related to HUB authentication.
+
+    Args:
+        args (List[str]): A list of command line arguments
+
+    Example:
+        ```bash
+        python my_script.py hub login your_api_key
+        ```
+    """
+    from ultralytics import hub
+
+    if args[0] == "login":
+        key = args[1] if len(args) > 1 else ""
+        # Log in to Ultralytics HUB using the provided API key
+        hub.login(key)
+    elif args[0] == "logout":
+        # Log out from Ultralytics HUB
+        hub.logout()
+
+
+def handle_yolo_settings(args: List[str]) -> None:
+    """
+    Handle YOLO settings command-line interface (CLI) commands.
+
+    This function processes YOLO settings CLI commands such as reset.
+    It should be called when executing a script with arguments related to YOLO settings management.
+
+    Args:
+        args (List[str]): A list of command line arguments for YOLO settings management.
+
+    Example:
+        ```bash
+        python my_script.py yolo settings reset
+        ```
+    """
+    url = "https://docs.ultralytics.com/quickstart/#ultralytics-settings"  # help URL
+    try:
+        if any(args):
+            if args[0] == "reset":
+                SETTINGS_YAML.unlink()  # delete the settings file
+                SETTINGS.reset()  # create new settings
+                LOGGER.info("Settings reset successfully")  # inform the user that settings have been reset
+            else:  # save a new setting
+                new = dict(parse_key_value_pair(a) for a in args)
+                check_dict_alignment(SETTINGS, new)
+                SETTINGS.update(new)
+
+        LOGGER.info(f"💡 Learn about settings at {url}")
+        yaml_print(SETTINGS_YAML)  # print the current settings
+    except Exception as e:
+        LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
+
+
+def handle_explorer():
+    """Open the Ultralytics Explorer GUI."""
+    checks.check_requirements("streamlit")
+    LOGGER.info("💡 Loading Explorer dashboard...")
+    subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
+
+
+def parse_key_value_pair(pair):
+    """Parse one 'key=value' pair and return key and value."""
+    k, v = pair.split("=", 1)  # split on first '=' sign
+    k, v = k.strip(), v.strip()  # remove spaces
+    assert v, f"missing '{k}' value"
+    return k, smart_value(v)
+
+
+def smart_value(v):
+    """Convert a string to an underlying type such as int, float, bool, etc."""
+    v_lower = v.lower()
+    if v_lower == "none":
+        return None
+    elif v_lower == "true":
+        return True
+    elif v_lower == "false":
+        return False
+    else:
+        with contextlib.suppress(Exception):
+            return eval(v)
+        return v
+
+
+def entrypoint(debug=""):
+    """
+    This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
+    to the package.
+
+    This function allows for:
+    - passing mandatory YOLO args as a list of strings
+    - specifying the task to be performed, either 'detect', 'segment' or 'classify'
+    - specifying the mode, either 'train', 'val', 'test', or 'predict'
+    - running special modes like 'checks'
+    - passing overrides to the package's configuration
+
+    It uses the package's default cfg and initializes it using the passed overrides.
+    Then it calls the CLI function with the composed cfg
+    """
+    args = (debug.split(" ") if debug else sys.argv)[1:]
+    if not args:  # no arguments passed
+        LOGGER.info(CLI_HELP_MSG)
+        return
+
+    special = {
+        "help": lambda: LOGGER.info(CLI_HELP_MSG),
+        "checks": checks.collect_system_info,
+        "version": lambda: LOGGER.info(__version__),
+        "settings": lambda: handle_yolo_settings(args[1:]),
+        "cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
+        "hub": lambda: handle_yolo_hub(args[1:]),
+        "login": lambda: handle_yolo_hub(args),
+        "copy-cfg": copy_default_cfg,
+        "explorer": lambda: handle_explorer(),
+    }
+    full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
+
+    # Define common misuses of special commands, i.e. -h, -help, --help
+    special.update({k[0]: v for k, v in special.items()})  # singular
+    special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")})  # singular
+    special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}
+
+    overrides = {}  # basic overrides, i.e. imgsz=320
+    for a in merge_equals_args(args):  # merge spaces around '=' sign
+        if a.startswith("--"):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
+            a = a[2:]
+        if a.endswith(","):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
+            a = a[:-1]
+        if "=" in a:
+            try:
+                k, v = parse_key_value_pair(a)
+                if k == "cfg" and v is not None:  # custom.yaml passed
+                    LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
+                    overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
+                else:
+                    overrides[k] = v
+            except (NameError, SyntaxError, ValueError, AssertionError) as e:
+                check_dict_alignment(full_args_dict, {a: ""}, e)
+
+        elif a in TASKS:
+            overrides["task"] = a
+        elif a in MODES:
+            overrides["mode"] = a
+        elif a.lower() in special:
+            special[a.lower()]()
+            return
+        elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
+            overrides[a] = True  # auto-True for default bool args, i.e. 'yolo show' sets show=True
+        elif a in DEFAULT_CFG_DICT:
+            raise SyntaxError(
+                f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
+                f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
+            )
+        else:
+            check_dict_alignment(full_args_dict, {a: ""})
+
+    # Check keys
+    check_dict_alignment(full_args_dict, overrides)
+
+    # Mode
+    mode = overrides.get("mode")
+    if mode is None:
+        mode = DEFAULT_CFG.mode or "predict"
+        LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
+    elif mode not in MODES:
+        raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
+
+    # Task
+    task = overrides.pop("task", None)
+    if task:
+        if task not in TASKS:
+            raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
+        if "model" not in overrides:
+            overrides["model"] = TASK2MODEL[task]
+
+    # Model
+    model = overrides.pop("model", DEFAULT_CFG.model)
+    if model is None:
+        model = "yolov8n.pt"
+        LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
+    overrides["model"] = model
+    # stem = Path(model).stem.lower()
+    stem = model.lower()
+    if "rtdetr" in stem:  # guess architecture
+        from ultralytics import RTDETR
+
+        model = RTDETR(model)  # no task argument
+    elif "fastsam" in stem:
+        from ultralytics import FastSAM
+
+        model = FastSAM(model)
+    elif "sam" in stem:
+        from ultralytics import SAM
+
+        model = SAM(model)
+    elif re.search("v3|v5|v6|v8|v9", stem):
+        from ultralytics import YOLO
+
+        model = YOLO(model, task=task)
+    else:
+        from ultralytics import YOLOv10
+
+        # Special case for the HuggingFace Hub
+        split_path = model.split('/')
+        if len(split_path) == 2 and (not os.path.exists(model)):
+            model = YOLOv10.from_pretrained(model)
+        else:
+            model = YOLOv10(model)
+    if isinstance(overrides.get("pretrained"), str):
+        model.load(overrides["pretrained"])
+
+    # Task Update
+    if task != model.task:
+        if task:
+            LOGGER.warning(
+                f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
+                f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
+            )
+        task = model.task
+
+    # Mode
+    if mode in ("predict", "track") and "source" not in overrides:
+        overrides["source"] = DEFAULT_CFG.source or ASSETS
+        LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
+    elif mode in ("train", "val"):
+        if "data" not in overrides and "resume" not in overrides:
+            overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
+            LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
+    elif mode == "export":
+        if "format" not in overrides:
+            overrides["format"] = DEFAULT_CFG.format or "torchscript"
+            LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")
+
+    # Run command in python
+    getattr(model, mode)(**overrides)  # default args from model
+
+    # Show help
+    LOGGER.info(f"💡 Learn more at https://docs.ultralytics.com/modes/{mode}")
+
+
+# Special modes --------------------------------------------------------------------------------------------------------
+def copy_default_cfg():
+    """Copy and create a new default configuration file with '_copy' appended to its name."""
+    new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
+    shutil.copy2(DEFAULT_CFG_PATH, new_file)
+    LOGGER.info(
+        f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
+        f"Example YOLO command with this new custom cfg:\n    yolo cfg='{new_file}' imgsz=320 batch=8"
+    )
+
+
+if __name__ == "__main__":
+    # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
+    entrypoint(debug="")
--- a/ultralytics/cfg/pycache/init.cpython-312.pyc
+++ b/ultralytics/cfg/pycache/init.cpython-312.pyc
--- a/ultralytics/cfg/pycache/init.cpython-39.pyc
+++ b/ultralytics/cfg/pycache/init.cpython-39.pyc
--- a/ultralytics/cfg/datasets/Argoverse.yaml
+++ b/ultralytics/cfg/datasets/Argoverse.yaml
@ -0,0 +1,74 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Argoverse-HD dataset (ring-front-center camera) https://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Documentation: https://docs.ultralytics.com/datasets/detect/argoverse/
+# Example usage: yolo train data=Argoverse.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── Argoverse  ← downloads here (31.5 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse # dataset root dir
+train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: bus
+  5: truck
+  6: traffic_light
+  7: stop_sign
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  from tqdm import tqdm
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = f'{img_name[:-3]}txt'
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+
+
+  # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
+  print("\n\nWARNING: Argoverse dataset MUST be downloaded manually, autodownload will NOT work.")
+  print(f"WARNING: Manually download Argoverse dataset '{urls[0]}' to '{dir}' and re-run your command.\n\n")
+  # download(urls, dir=dir)
+
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert Argoverse annotations to YOLO labels
--- a/ultralytics/cfg/datasets/DOTAv1.5.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.5.yaml
@ -0,0 +1,36 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA 1.5 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
+# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.5.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota1.5  ← downloads here (2GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/DOTAv1.5 # dataset root dir
+train: images/train # train images (relative to 'path') 1411 images
+val: images/val # val images (relative to 'path') 458 images
+test: images/test # test images (optional) 937 images
+
+# Classes for DOTA 1.5
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+  15: container crane
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.5.zip
--- a/ultralytics/cfg/datasets/DOTAv1.yaml
+++ b/ultralytics/cfg/datasets/DOTAv1.yaml
@ -0,0 +1,35 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA 1.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota-v2/
+# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv1.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota1  ← downloads here (2GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/DOTAv1 # dataset root dir
+train: images/train # train images (relative to 'path') 1411 images
+val: images/val # val images (relative to 'path') 458 images
+test: images/test # test images (optional) 937 images
+
+# Classes for DOTA 1.0
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv1.zip
--- a/ultralytics/cfg/datasets/GlobalWheat2020.yaml
+++ b/ultralytics/cfg/datasets/GlobalWheat2020.yaml
@ -0,0 +1,53 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Global Wheat 2020 dataset https://www.global-wheat.com/ by University of Saskatchewan
+# Documentation: https://docs.ultralytics.com/datasets/detect/globalwheat2020/
+# Example usage: yolo train data=GlobalWheat2020.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── GlobalWheat2020  ← downloads here (7.0 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020 # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+
+# Classes
+names:
+  0: wheat_head
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
+  download(urls, dir=dir)
+
+  # Make Directories
+  for p in 'annotations', 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+
+  # Move
+  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
+           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
+      (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p)  # move to /images
+      f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json')  # json file
+      if f.exists():
+          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
--- a/ultralytics/cfg/datasets/ImageNet.yaml
+++ b/ultralytics/cfg/datasets/ImageNet.yaml
--- a/ultralytics/cfg/datasets/Objects365.yaml
+++ b/ultralytics/cfg/datasets/Objects365.yaml
@ -0,0 +1,442 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Objects365 dataset https://www.objects365.org/ by Megvii
+# Documentation: https://docs.ultralytics.com/datasets/detect/objects365/
+# Example usage: yolo train data=Objects365.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365 # dataset root dir
+train: images/train # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 80000 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: Person
+  1: Sneakers
+  2: Chair
+  3: Other Shoes
+  4: Hat
+  5: Car
+  6: Lamp
+  7: Glasses
+  8: Bottle
+  9: Desk
+  10: Cup
+  11: Street Lights
+  12: Cabinet/shelf
+  13: Handbag/Satchel
+  14: Bracelet
+  15: Plate
+  16: Picture/Frame
+  17: Helmet
+  18: Book
+  19: Gloves
+  20: Storage box
+  21: Boat
+  22: Leather Shoes
+  23: Flower
+  24: Bench
+  25: Potted Plant
+  26: Bowl/Basin
+  27: Flag
+  28: Pillow
+  29: Boots
+  30: Vase
+  31: Microphone
+  32: Necklace
+  33: Ring
+  34: SUV
+  35: Wine Glass
+  36: Belt
+  37: Monitor/TV
+  38: Backpack
+  39: Umbrella
+  40: Traffic Light
+  41: Speaker
+  42: Watch
+  43: Tie
+  44: Trash bin Can
+  45: Slippers
+  46: Bicycle
+  47: Stool
+  48: Barrel/bucket
+  49: Van
+  50: Couch
+  51: Sandals
+  52: Basket
+  53: Drum
+  54: Pen/Pencil
+  55: Bus
+  56: Wild Bird
+  57: High Heels
+  58: Motorcycle
+  59: Guitar
+  60: Carpet
+  61: Cell Phone
+  62: Bread
+  63: Camera
+  64: Canned
+  65: Truck
+  66: Traffic cone
+  67: Cymbal
+  68: Lifesaver
+  69: Towel
+  70: Stuffed Toy
+  71: Candle
+  72: Sailboat
+  73: Laptop
+  74: Awning
+  75: Bed
+  76: Faucet
+  77: Tent
+  78: Horse
+  79: Mirror
+  80: Power outlet
+  81: Sink
+  82: Apple
+  83: Air Conditioner
+  84: Knife
+  85: Hockey Stick
+  86: Paddle
+  87: Pickup Truck
+  88: Fork
+  89: Traffic Sign
+  90: Balloon
+  91: Tripod
+  92: Dog
+  93: Spoon
+  94: Clock
+  95: Pot
+  96: Cow
+  97: Cake
+  98: Dinning Table
+  99: Sheep
+  100: Hanger
+  101: Blackboard/Whiteboard
+  102: Napkin
+  103: Other Fish
+  104: Orange/Tangerine
+  105: Toiletry
+  106: Keyboard
+  107: Tomato
+  108: Lantern
+  109: Machinery Vehicle
+  110: Fan
+  111: Green Vegetables
+  112: Banana
+  113: Baseball Glove
+  114: Airplane
+  115: Mouse
+  116: Train
+  117: Pumpkin
+  118: Soccer
+  119: Skiboard
+  120: Luggage
+  121: Nightstand
+  122: Tea pot
+  123: Telephone
+  124: Trolley
+  125: Head Phone
+  126: Sports Car
+  127: Stop Sign
+  128: Dessert
+  129: Scooter
+  130: Stroller
+  131: Crane
+  132: Remote
+  133: Refrigerator
+  134: Oven
+  135: Lemon
+  136: Duck
+  137: Baseball Bat
+  138: Surveillance Camera
+  139: Cat
+  140: Jug
+  141: Broccoli
+  142: Piano
+  143: Pizza
+  144: Elephant
+  145: Skateboard
+  146: Surfboard
+  147: Gun
+  148: Skating and Skiing shoes
+  149: Gas stove
+  150: Donut
+  151: Bow Tie
+  152: Carrot
+  153: Toilet
+  154: Kite
+  155: Strawberry
+  156: Other Balls
+  157: Shovel
+  158: Pepper
+  159: Computer Box
+  160: Toilet Paper
+  161: Cleaning Products
+  162: Chopsticks
+  163: Microwave
+  164: Pigeon
+  165: Baseball
+  166: Cutting/chopping Board
+  167: Coffee Table
+  168: Side Table
+  169: Scissors
+  170: Marker
+  171: Pie
+  172: Ladder
+  173: Snowboard
+  174: Cookies
+  175: Radiator
+  176: Fire Hydrant
+  177: Basketball
+  178: Zebra
+  179: Grape
+  180: Giraffe
+  181: Potato
+  182: Sausage
+  183: Tricycle
+  184: Violin
+  185: Egg
+  186: Fire Extinguisher
+  187: Candy
+  188: Fire Truck
+  189: Billiards
+  190: Converter
+  191: Bathtub
+  192: Wheelchair
+  193: Golf Club
+  194: Briefcase
+  195: Cucumber
+  196: Cigar/Cigarette
+  197: Paint Brush
+  198: Pear
+  199: Heavy Truck
+  200: Hamburger
+  201: Extractor
+  202: Extension Cord
+  203: Tong
+  204: Tennis Racket
+  205: Folder
+  206: American Football
+  207: earphone
+  208: Mask
+  209: Kettle
+  210: Tennis
+  211: Ship
+  212: Swing
+  213: Coffee Machine
+  214: Slide
+  215: Carriage
+  216: Onion
+  217: Green beans
+  218: Projector
+  219: Frisbee
+  220: Washing Machine/Drying Machine
+  221: Chicken
+  222: Printer
+  223: Watermelon
+  224: Saxophone
+  225: Tissue
+  226: Toothbrush
+  227: Ice cream
+  228: Hot-air balloon
+  229: Cello
+  230: French Fries
+  231: Scale
+  232: Trophy
+  233: Cabbage
+  234: Hot dog
+  235: Blender
+  236: Peach
+  237: Rice
+  238: Wallet/Purse
+  239: Volleyball
+  240: Deer
+  241: Goose
+  242: Tape
+  243: Tablet
+  244: Cosmetics
+  245: Trumpet
+  246: Pineapple
+  247: Golf Ball
+  248: Ambulance
+  249: Parking meter
+  250: Mango
+  251: Key
+  252: Hurdle
+  253: Fishing Rod
+  254: Medal
+  255: Flute
+  256: Brush
+  257: Penguin
+  258: Megaphone
+  259: Corn
+  260: Lettuce
+  261: Garlic
+  262: Swan
+  263: Helicopter
+  264: Green Onion
+  265: Sandwich
+  266: Nuts
+  267: Speed Limit Sign
+  268: Induction Cooker
+  269: Broom
+  270: Trombone
+  271: Plum
+  272: Rickshaw
+  273: Goldfish
+  274: Kiwi fruit
+  275: Router/modem
+  276: Poker Card
+  277: Toaster
+  278: Shrimp
+  279: Sushi
+  280: Cheese
+  281: Notepaper
+  282: Cherry
+  283: Pliers
+  284: CD
+  285: Pasta
+  286: Hammer
+  287: Cue
+  288: Avocado
+  289: Hamimelon
+  290: Flask
+  291: Mushroom
+  292: Screwdriver
+  293: Soap
+  294: Recorder
+  295: Bear
+  296: Eggplant
+  297: Board Eraser
+  298: Coconut
+  299: Tape Measure/Ruler
+  300: Pig
+  301: Showerhead
+  302: Globe
+  303: Chips
+  304: Steak
+  305: Crosswalk Sign
+  306: Stapler
+  307: Camel
+  308: Formula 1
+  309: Pomegranate
+  310: Dishwasher
+  311: Crab
+  312: Hoverboard
+  313: Meat ball
+  314: Rice Cooker
+  315: Tuba
+  316: Calculator
+  317: Papaya
+  318: Antelope
+  319: Parrot
+  320: Seal
+  321: Butterfly
+  322: Dumbbell
+  323: Donkey
+  324: Lion
+  325: Urinal
+  326: Dolphin
+  327: Electric Drill
+  328: Hair Dryer
+  329: Egg tart
+  330: Jellyfish
+  331: Treadmill
+  332: Lighter
+  333: Grapefruit
+  334: Game board
+  335: Mop
+  336: Radish
+  337: Baozi
+  338: Target
+  339: French
+  340: Spring Rolls
+  341: Monkey
+  342: Rabbit
+  343: Pencil Case
+  344: Yak
+  345: Red Cabbage
+  346: Binoculars
+  347: Asparagus
+  348: Barbell
+  349: Scallop
+  350: Noddles
+  351: Comb
+  352: Dumpling
+  353: Oyster
+  354: Table Tennis paddle
+  355: Cosmetics Brush/Eyeliner Pencil
+  356: Chainsaw
+  357: Eraser
+  358: Lobster
+  359: Durian
+  360: Okra
+  361: Lipstick
+  362: Cosmetics Mirror
+  363: Curling
+  364: Table Tennis
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from tqdm import tqdm
+
+  from ultralytics.utils.checks import check_requirements
+  from ultralytics.utils.downloads import download
+  from ultralytics.utils.ops import xyxy2xywhn
+
+  import numpy as np
+  from pathlib import Path
+
+  check_requirements(('pycocotools>=2.0',))
+  from pycocotools.coco import COCO
+
+  # Make Directories
+  dir = Path(yaml['path'])  # dataset root dir
+  for p in 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+      for q in 'train', 'val':
+          (dir / p / q).mkdir(parents=True, exist_ok=True)
+
+  # Train, Val Splits
+  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
+      print(f"Processing {split} in {patches} patches ...")
+      images, labels = dir / 'images' / split, dir / 'labels' / split
+
+      # Download
+      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
+      if split == 'train':
+          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir)  # annotations json
+          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
+      elif split == 'val':
+          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir)  # annotations json
+          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
+          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
+
+      # Move
+      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
+          f.rename(images / f.name)  # move to /images/{split}
+
+      # Labels
+      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
+      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
+      for cid, cat in enumerate(names):
+          catIds = coco.getCatIds(catNms=[cat])
+          imgIds = coco.getImgIds(catIds=catIds)
+          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
+              width, height = im["width"], im["height"]
+              path = Path(im["file_name"])  # image filename
+              try:
+                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
+                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
+                      for a in coco.loadAnns(annIds):
+                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
+                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
+                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
+                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
+              except Exception as e:
+                  print(e)
--- a/ultralytics/cfg/datasets/SKU-110K.yaml
+++ b/ultralytics/cfg/datasets/SKU-110K.yaml
@ -0,0 +1,57 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
+# Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
+# Example usage: yolo train data=SKU-110K.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── SKU-110K  ← downloads here (13.6 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K # dataset root dir
+train: train.txt # train images (relative to 'path')  8219 images
+val: val.txt # val images (relative to 'path')  588 images
+test: test.txt # test images (optional)  2936 images
+
+# Classes
+names:
+  0: object
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import shutil
+  from pathlib import Path
+
+  import numpy as np
+  import pandas as pd
+  from tqdm import tqdm
+
+  from ultralytics.utils.downloads import download
+  from ultralytics.utils.ops import xyxy2xywh
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
+  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
+  download(urls, dir=parent)
+
+  # Rename directories
+  if dir.exists():
+      shutil.rmtree(dir)
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
+  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
+
+  # Convert labels
+  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
+  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
+      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
+      images, unique_images = x[:, 0], np.unique(x[:, 0])
+      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
+          f.writelines(f'./images/{s}\n' for s in unique_images)
+      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
+          cls = 0  # single-class dataset
+          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
+              for r in x[images == im]:
+                  w, h = r[6], r[7]  # image width, height
+                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
+                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
--- a/ultralytics/cfg/datasets/VOC.yaml
+++ b/ultralytics/cfg/datasets/VOC.yaml
@ -0,0 +1,99 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
+# Documentation: # Documentation: https://docs.ultralytics.com/datasets/detect/voc/
+# Example usage: yolo train data=VOC.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── VOC  ← downloads here (2.8 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+
+# Classes
+names:
+  0: aeroplane
+  1: bicycle
+  2: bird
+  3: boat
+  4: bottle
+  5: bus
+  6: car
+  7: cat
+  8: chair
+  9: cow
+  10: diningtable
+  11: dog
+  12: horse
+  13: motorbike
+  14: person
+  15: pottedplant
+  16: sheep
+  17: sofa
+  18: train
+  19: tvmonitor
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+
+  from tqdm import tqdm
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+
+      names = list(yaml['names'].values())  # names list
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in names and int(obj.find('difficult').text) != 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = names.index(cls)  # class id
+              out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', curl=True, threads=3, exist_ok=True)  # download and unzip over existing paths (required)
+
+  # Convert
+  path = dir / 'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+
+      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
+          image_ids = f.read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
--- a/ultralytics/cfg/datasets/VisDrone.yaml
+++ b/ultralytics/cfg/datasets/VisDrone.yaml
@ -0,0 +1,72 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
+# Documentation: https://docs.ultralytics.com/datasets/detect/visdrone/
+# Example usage: yolo train data=VisDrone.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── VisDrone  ← downloads here (2.3 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone # dataset root dir
+train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images
+
+# Classes
+names:
+  0: pedestrian
+  1: people
+  2: bicycle
+  3: car
+  4: van
+  5: truck
+  6: tricycle
+  7: awning-tricycle
+  8: bus
+  9: motor
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import os
+  from pathlib import Path
+
+  from ultralytics.utils.downloads import download
+
+  def visdrone2yolo(dir):
+      from PIL import Image
+      from tqdm import tqdm
+
+      def convert_box(size, box):
+          # Convert VisDrone box to YOLO xywh box
+          dw = 1. / size[0]
+          dh = 1. / size[1]
+          return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
+
+      (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
+      pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
+      for f in pbar:
+          img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
+          lines = []
+          with open(f, 'r') as file:  # read annotation.txt
+              for row in [x.split(',') for x in file.read().strip().splitlines()]:
+                  if row[4] == '0':  # VisDrone 'ignored regions' class 0
+                      continue
+                  cls = int(row[5]) - 1
+                  box = convert_box(img_size, tuple(map(int, row[:4])))
+                  lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
+                  with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
+                      fl.writelines(lines)  # write label.txt
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
+  download(urls, dir=dir, curl=True, threads=4)
+
+  # Convert
+  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
+      visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels
--- a/ultralytics/cfg/datasets/african-wildlife.yaml
+++ b/ultralytics/cfg/datasets/african-wildlife.yaml
@ -0,0 +1,24 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# African-wildlife dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/african-wildlife/
+# Example usage: yolo train data=african-wildlife.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── african-wildlife  ← downloads here (100 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/african-wildlife # dataset root dir
+train: train/images # train images (relative to 'path') 1052 images
+val: valid/images # val images (relative to 'path') 225 images
+test: test/images # test images (relative to 'path') 227 images
+
+# Classes
+names:
+  0: buffalo
+  1: elephant
+  2: rhino
+  3: zebra
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/african-wildlife.zip
--- a/ultralytics/cfg/datasets/brain-tumor.yaml
+++ b/ultralytics/cfg/datasets/brain-tumor.yaml
@ -0,0 +1,22 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Brain-tumor dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/brain-tumor/
+# Example usage: yolo train data=brain-tumor.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── brain-tumor  ← downloads here (4.05 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/brain-tumor # dataset root dir
+train: train/images # train images (relative to 'path') 893 images
+val: valid/images # val images (relative to 'path') 223 images
+test: # test images (relative to 'path')
+
+# Classes
+names:
+  0: negative
+  1: positive
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/brain-tumor.zip
--- a/ultralytics/cfg/datasets/carparts-seg.yaml
+++ b/ultralytics/cfg/datasets/carparts-seg.yaml
@ -0,0 +1,43 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Carparts-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/carparts-seg/
+# Example usage: yolo train data=carparts-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── carparts-seg  ← downloads here (132 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/carparts-seg # dataset root dir
+train: train/images # train images (relative to 'path') 3516 images
+val: valid/images # val images (relative to 'path') 276 images
+test: test/images # test images (relative to 'path') 401 images
+
+# Classes
+names:
+  0: back_bumper
+  1: back_door
+  2: back_glass
+  3: back_left_door
+  4: back_left_light
+  5: back_light
+  6: back_right_door
+  7: back_right_light
+  8: front_bumper
+  9: front_door
+  10: front_glass
+  11: front_left_door
+  12: front_left_light
+  13: front_light
+  14: front_right_door
+  15: front_right_light
+  16: hood
+  17: left_mirror
+  18: object
+  19: right_mirror
+  20: tailgate
+  21: trunk
+  22: wheel
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/carparts-seg.zip
--- a/ultralytics/cfg/datasets/coco-pose.yaml
+++ b/ultralytics/cfg/datasets/coco-pose.yaml
@ -0,0 +1,38 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.ultralytics.com/datasets/pose/coco/
+# Example usage: yolo train data=coco-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco-pose  ← downloads here (20.1 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco-pose # dataset root dir
+train: train2017.txt # train images (relative to 'path') 118287 images
+val: val2017.txt # val images (relative to 'path') 5000 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Keypoints
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+
+# Classes
+names:
+  0: person
+
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  # Download labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'coco2017labels-pose.zip']  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
--- a/ultralytics/cfg/datasets/coco.yaml
+++ b/ultralytics/cfg/datasets/coco.yaml
@ -0,0 +1,114 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco # dataset root dir
+train: train2017.txt # train images (relative to 'path') 118287 images
+val: val2017.txt # val images (relative to 'path') 5000 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  # Download labels
+  segments = True  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
--- a/ultralytics/cfg/datasets/coco128-seg.yaml
+++ b/ultralytics/cfg/datasets/coco128-seg.yaml
@ -0,0 +1,100 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
+# Example usage: yolo train data=coco128.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco128-seg  ← downloads here (7 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128-seg # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128-seg.zip
--- a/ultralytics/cfg/datasets/coco128.yaml
+++ b/ultralytics/cfg/datasets/coco128.yaml
@ -0,0 +1,100 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco/
+# Example usage: yolo train data=coco128.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco128  ← downloads here (7 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128 # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128.zip
--- a/ultralytics/cfg/datasets/coco8-pose.yaml
+++ b/ultralytics/cfg/datasets/coco8-pose.yaml
@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/pose/coco8-pose/
+# Example usage: yolo train data=coco8-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8-pose  ← downloads here (1 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8-pose # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)
+
+# Keypoints
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+
+# Classes
+names:
+  0: person
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8-pose.zip
--- a/ultralytics/cfg/datasets/coco8-seg.yaml
+++ b/ultralytics/cfg/datasets/coco8-seg.yaml
@ -0,0 +1,100 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/coco8-seg/
+# Example usage: yolo train data=coco8-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8-seg  ← downloads here (1 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8-seg # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8-seg.zip
--- a/ultralytics/cfg/datasets/coco8.yaml
+++ b/ultralytics/cfg/datasets/coco8.yaml
@ -0,0 +1,100 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/detect/coco8/
+# Example usage: yolo train data=coco8.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8  ← downloads here (1 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8 # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8.zip
--- a/ultralytics/cfg/datasets/crack-seg.yaml
+++ b/ultralytics/cfg/datasets/crack-seg.yaml
@ -0,0 +1,21 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Crack-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/crack-seg/
+# Example usage: yolo train data=crack-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── crack-seg  ← downloads here (91.2 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/crack-seg # dataset root dir
+train: train/images # train images (relative to 'path') 3717 images
+val: valid/images # val images (relative to 'path') 112 images
+test: test/images # test images (relative to 'path') 200 images
+
+# Classes
+names:
+  0: crack
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/crack-seg.zip
--- a/ultralytics/cfg/datasets/dota8.yaml
+++ b/ultralytics/cfg/datasets/dota8.yaml
@ -0,0 +1,34 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA8 dataset 8 images from split DOTAv1 dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/obb/dota8/
+# Example usage: yolo train model=yolov8n-obb.pt data=dota8.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota8  ← downloads here (1MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/dota8 # dataset root dir
+train: images/train # train images (relative to 'path') 4 images
+val: images/val # val images (relative to 'path') 4 images
+
+# Classes for DOTA 1.0
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/dota8.zip
--- a/ultralytics/cfg/datasets/open-images-v7.yaml
+++ b/ultralytics/cfg/datasets/open-images-v7.yaml
@ -0,0 +1,660 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
+# Documentation: https://docs.ultralytics.com/datasets/detect/open-images-v7/
+# Example usage: yolo train data=open-images-v7.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── open-images-v7  ← downloads here (561 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/open-images-v7 # dataset root dir
+train: images/train # train images (relative to 'path') 1743042 images
+val: images/val # val images (relative to 'path') 41620 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: Accordion
+  1: Adhesive tape
+  2: Aircraft
+  3: Airplane
+  4: Alarm clock
+  5: Alpaca
+  6: Ambulance
+  7: Animal
+  8: Ant
+  9: Antelope
+  10: Apple
+  11: Armadillo
+  12: Artichoke
+  13: Auto part
+  14: Axe
+  15: Backpack
+  16: Bagel
+  17: Baked goods
+  18: Balance beam
+  19: Ball
+  20: Balloon
+  21: Banana
+  22: Band-aid
+  23: Banjo
+  24: Barge
+  25: Barrel
+  26: Baseball bat
+  27: Baseball glove
+  28: Bat (Animal)
+  29: Bathroom accessory
+  30: Bathroom cabinet
+  31: Bathtub
+  32: Beaker
+  33: Bear
+  34: Bed
+  35: Bee
+  36: Beehive
+  37: Beer
+  38: Beetle
+  39: Bell pepper
+  40: Belt
+  41: Bench
+  42: Bicycle
+  43: Bicycle helmet
+  44: Bicycle wheel
+  45: Bidet
+  46: Billboard
+  47: Billiard table
+  48: Binoculars
+  49: Bird
+  50: Blender
+  51: Blue jay
+  52: Boat
+  53: Bomb
+  54: Book
+  55: Bookcase
+  56: Boot
+  57: Bottle
+  58: Bottle opener
+  59: Bow and arrow
+  60: Bowl
+  61: Bowling equipment
+  62: Box
+  63: Boy
+  64: Brassiere
+  65: Bread
+  66: Briefcase
+  67: Broccoli
+  68: Bronze sculpture
+  69: Brown bear
+  70: Building
+  71: Bull
+  72: Burrito
+  73: Bus
+  74: Bust
+  75: Butterfly
+  76: Cabbage
+  77: Cabinetry
+  78: Cake
+  79: Cake stand
+  80: Calculator
+  81: Camel
+  82: Camera
+  83: Can opener
+  84: Canary
+  85: Candle
+  86: Candy
+  87: Cannon
+  88: Canoe
+  89: Cantaloupe
+  90: Car
+  91: Carnivore
+  92: Carrot
+  93: Cart
+  94: Cassette deck
+  95: Castle
+  96: Cat
+  97: Cat furniture
+  98: Caterpillar
+  99: Cattle
+  100: Ceiling fan
+  101: Cello
+  102: Centipede
+  103: Chainsaw
+  104: Chair
+  105: Cheese
+  106: Cheetah
+  107: Chest of drawers
+  108: Chicken
+  109: Chime
+  110: Chisel
+  111: Chopsticks
+  112: Christmas tree
+  113: Clock
+  114: Closet
+  115: Clothing
+  116: Coat
+  117: Cocktail
+  118: Cocktail shaker
+  119: Coconut
+  120: Coffee
+  121: Coffee cup
+  122: Coffee table
+  123: Coffeemaker
+  124: Coin
+  125: Common fig
+  126: Common sunflower
+  127: Computer keyboard
+  128: Computer monitor
+  129: Computer mouse
+  130: Container
+  131: Convenience store
+  132: Cookie
+  133: Cooking spray
+  134: Corded phone
+  135: Cosmetics
+  136: Couch
+  137: Countertop
+  138: Cowboy hat
+  139: Crab
+  140: Cream
+  141: Cricket ball
+  142: Crocodile
+  143: Croissant
+  144: Crown
+  145: Crutch
+  146: Cucumber
+  147: Cupboard
+  148: Curtain
+  149: Cutting board
+  150: Dagger
+  151: Dairy Product
+  152: Deer
+  153: Desk
+  154: Dessert
+  155: Diaper
+  156: Dice
+  157: Digital clock
+  158: Dinosaur
+  159: Dishwasher
+  160: Dog
+  161: Dog bed
+  162: Doll
+  163: Dolphin
+  164: Door
+  165: Door handle
+  166: Doughnut
+  167: Dragonfly
+  168: Drawer
+  169: Dress
+  170: Drill (Tool)
+  171: Drink
+  172: Drinking straw
+  173: Drum
+  174: Duck
+  175: Dumbbell
+  176: Eagle
+  177: Earrings
+  178: Egg (Food)
+  179: Elephant
+  180: Envelope
+  181: Eraser
+  182: Face powder
+  183: Facial tissue holder
+  184: Falcon
+  185: Fashion accessory
+  186: Fast food
+  187: Fax
+  188: Fedora
+  189: Filing cabinet
+  190: Fire hydrant
+  191: Fireplace
+  192: Fish
+  193: Flag
+  194: Flashlight
+  195: Flower
+  196: Flowerpot
+  197: Flute
+  198: Flying disc
+  199: Food
+  200: Food processor
+  201: Football
+  202: Football helmet
+  203: Footwear
+  204: Fork
+  205: Fountain
+  206: Fox
+  207: French fries
+  208: French horn
+  209: Frog
+  210: Fruit
+  211: Frying pan
+  212: Furniture
+  213: Garden Asparagus
+  214: Gas stove
+  215: Giraffe
+  216: Girl
+  217: Glasses
+  218: Glove
+  219: Goat
+  220: Goggles
+  221: Goldfish
+  222: Golf ball
+  223: Golf cart
+  224: Gondola
+  225: Goose
+  226: Grape
+  227: Grapefruit
+  228: Grinder
+  229: Guacamole
+  230: Guitar
+  231: Hair dryer
+  232: Hair spray
+  233: Hamburger
+  234: Hammer
+  235: Hamster
+  236: Hand dryer
+  237: Handbag
+  238: Handgun
+  239: Harbor seal
+  240: Harmonica
+  241: Harp
+  242: Harpsichord
+  243: Hat
+  244: Headphones
+  245: Heater
+  246: Hedgehog
+  247: Helicopter
+  248: Helmet
+  249: High heels
+  250: Hiking equipment
+  251: Hippopotamus
+  252: Home appliance
+  253: Honeycomb
+  254: Horizontal bar
+  255: Horse
+  256: Hot dog
+  257: House
+  258: Houseplant
+  259: Human arm
+  260: Human beard
+  261: Human body
+  262: Human ear
+  263: Human eye
+  264: Human face
+  265: Human foot
+  266: Human hair
+  267: Human hand
+  268: Human head
+  269: Human leg
+  270: Human mouth
+  271: Human nose
+  272: Humidifier
+  273: Ice cream
+  274: Indoor rower
+  275: Infant bed
+  276: Insect
+  277: Invertebrate
+  278: Ipod
+  279: Isopod
+  280: Jacket
+  281: Jacuzzi
+  282: Jaguar (Animal)
+  283: Jeans
+  284: Jellyfish
+  285: Jet ski
+  286: Jug
+  287: Juice
+  288: Kangaroo
+  289: Kettle
+  290: Kitchen & dining room table
+  291: Kitchen appliance
+  292: Kitchen knife
+  293: Kitchen utensil
+  294: Kitchenware
+  295: Kite
+  296: Knife
+  297: Koala
+  298: Ladder
+  299: Ladle
+  300: Ladybug
+  301: Lamp
+  302: Land vehicle
+  303: Lantern
+  304: Laptop
+  305: Lavender (Plant)
+  306: Lemon
+  307: Leopard
+  308: Light bulb
+  309: Light switch
+  310: Lighthouse
+  311: Lily
+  312: Limousine
+  313: Lion
+  314: Lipstick
+  315: Lizard
+  316: Lobster
+  317: Loveseat
+  318: Luggage and bags
+  319: Lynx
+  320: Magpie
+  321: Mammal
+  322: Man
+  323: Mango
+  324: Maple
+  325: Maracas
+  326: Marine invertebrates
+  327: Marine mammal
+  328: Measuring cup
+  329: Mechanical fan
+  330: Medical equipment
+  331: Microphone
+  332: Microwave oven
+  333: Milk
+  334: Miniskirt
+  335: Mirror
+  336: Missile
+  337: Mixer
+  338: Mixing bowl
+  339: Mobile phone
+  340: Monkey
+  341: Moths and butterflies
+  342: Motorcycle
+  343: Mouse
+  344: Muffin
+  345: Mug
+  346: Mule
+  347: Mushroom
+  348: Musical instrument
+  349: Musical keyboard
+  350: Nail (Construction)
+  351: Necklace
+  352: Nightstand
+  353: Oboe
+  354: Office building
+  355: Office supplies
+  356: Orange
+  357: Organ (Musical Instrument)
+  358: Ostrich
+  359: Otter
+  360: Oven
+  361: Owl
+  362: Oyster
+  363: Paddle
+  364: Palm tree
+  365: Pancake
+  366: Panda
+  367: Paper cutter
+  368: Paper towel
+  369: Parachute
+  370: Parking meter
+  371: Parrot
+  372: Pasta
+  373: Pastry
+  374: Peach
+  375: Pear
+  376: Pen
+  377: Pencil case
+  378: Pencil sharpener
+  379: Penguin
+  380: Perfume
+  381: Person
+  382: Personal care
+  383: Personal flotation device
+  384: Piano
+  385: Picnic basket
+  386: Picture frame
+  387: Pig
+  388: Pillow
+  389: Pineapple
+  390: Pitcher (Container)
+  391: Pizza
+  392: Pizza cutter
+  393: Plant
+  394: Plastic bag
+  395: Plate
+  396: Platter
+  397: Plumbing fixture
+  398: Polar bear
+  399: Pomegranate
+  400: Popcorn
+  401: Porch
+  402: Porcupine
+  403: Poster
+  404: Potato
+  405: Power plugs and sockets
+  406: Pressure cooker
+  407: Pretzel
+  408: Printer
+  409: Pumpkin
+  410: Punching bag
+  411: Rabbit
+  412: Raccoon
+  413: Racket
+  414: Radish
+  415: Ratchet (Device)
+  416: Raven
+  417: Rays and skates
+  418: Red panda
+  419: Refrigerator
+  420: Remote control
+  421: Reptile
+  422: Rhinoceros
+  423: Rifle
+  424: Ring binder
+  425: Rocket
+  426: Roller skates
+  427: Rose
+  428: Rugby ball
+  429: Ruler
+  430: Salad
+  431: Salt and pepper shakers
+  432: Sandal
+  433: Sandwich
+  434: Saucer
+  435: Saxophone
+  436: Scale
+  437: Scarf
+  438: Scissors
+  439: Scoreboard
+  440: Scorpion
+  441: Screwdriver
+  442: Sculpture
+  443: Sea lion
+  444: Sea turtle
+  445: Seafood
+  446: Seahorse
+  447: Seat belt
+  448: Segway
+  449: Serving tray
+  450: Sewing machine
+  451: Shark
+  452: Sheep
+  453: Shelf
+  454: Shellfish
+  455: Shirt
+  456: Shorts
+  457: Shotgun
+  458: Shower
+  459: Shrimp
+  460: Sink
+  461: Skateboard
+  462: Ski
+  463: Skirt
+  464: Skull
+  465: Skunk
+  466: Skyscraper
+  467: Slow cooker
+  468: Snack
+  469: Snail
+  470: Snake
+  471: Snowboard
+  472: Snowman
+  473: Snowmobile
+  474: Snowplow
+  475: Soap dispenser
+  476: Sock
+  477: Sofa bed
+  478: Sombrero
+  479: Sparrow
+  480: Spatula
+  481: Spice rack
+  482: Spider
+  483: Spoon
+  484: Sports equipment
+  485: Sports uniform
+  486: Squash (Plant)
+  487: Squid
+  488: Squirrel
+  489: Stairs
+  490: Stapler
+  491: Starfish
+  492: Stationary bicycle
+  493: Stethoscope
+  494: Stool
+  495: Stop sign
+  496: Strawberry
+  497: Street light
+  498: Stretcher
+  499: Studio couch
+  500: Submarine
+  501: Submarine sandwich
+  502: Suit
+  503: Suitcase
+  504: Sun hat
+  505: Sunglasses
+  506: Surfboard
+  507: Sushi
+  508: Swan
+  509: Swim cap
+  510: Swimming pool
+  511: Swimwear
+  512: Sword
+  513: Syringe
+  514: Table
+  515: Table tennis racket
+  516: Tablet computer
+  517: Tableware
+  518: Taco
+  519: Tank
+  520: Tap
+  521: Tart
+  522: Taxi
+  523: Tea
+  524: Teapot
+  525: Teddy bear
+  526: Telephone
+  527: Television
+  528: Tennis ball
+  529: Tennis racket
+  530: Tent
+  531: Tiara
+  532: Tick
+  533: Tie
+  534: Tiger
+  535: Tin can
+  536: Tire
+  537: Toaster
+  538: Toilet
+  539: Toilet paper
+  540: Tomato
+  541: Tool
+  542: Toothbrush
+  543: Torch
+  544: Tortoise
+  545: Towel
+  546: Tower
+  547: Toy
+  548: Traffic light
+  549: Traffic sign
+  550: Train
+  551: Training bench
+  552: Treadmill
+  553: Tree
+  554: Tree house
+  555: Tripod
+  556: Trombone
+  557: Trousers
+  558: Truck
+  559: Trumpet
+  560: Turkey
+  561: Turtle
+  562: Umbrella
+  563: Unicycle
+  564: Van
+  565: Vase
+  566: Vegetable
+  567: Vehicle
+  568: Vehicle registration plate
+  569: Violin
+  570: Volleyball (Ball)
+  571: Waffle
+  572: Waffle iron
+  573: Wall clock
+  574: Wardrobe
+  575: Washing machine
+  576: Waste container
+  577: Watch
+  578: Watercraft
+  579: Watermelon
+  580: Weapon
+  581: Whale
+  582: Wheel
+  583: Wheelchair
+  584: Whisk
+  585: Whiteboard
+  586: Willow
+  587: Window
+  588: Window blind
+  589: Wine
+  590: Wine glass
+  591: Wine rack
+  592: Winter melon
+  593: Wok
+  594: Woman
+  595: Wood-burning stove
+  596: Woodpecker
+  597: Worm
+  598: Wrench
+  599: Zebra
+  600: Zucchini
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
+  from ultralytics.utils.checks import check_requirements, check_version
+
+  check_requirements('fiftyone')
+  if is_ubuntu() and check_version(get_ubuntu_version(), '>=22.04'):
+      # Ubuntu>=22.04 patch https://github.com/voxel51/fiftyone/issues/2961#issuecomment-1666519347
+      check_requirements('fiftyone-db-ubuntu2204')
+
+  import fiftyone as fo
+  import fiftyone.zoo as foz
+  import warnings
+
+  name = 'open-images-v7'
+  fraction = 1.0  # fraction of full dataset to use
+  LOGGER.warning('WARNING ⚠️ Open Images V7 dataset requires at least **561 GB of free space. Starting download...')
+  for split in 'train', 'validation':  # 1743042 train, 41620 val images
+      train = split == 'train'
+
+      # Load Open Images dataset
+      dataset = foz.load_zoo_dataset(name,
+                                     split=split,
+                                     label_types=['detections'],
+                                     dataset_dir=Path(SETTINGS['datasets_dir']) / 'fiftyone' / name,
+                                     max_samples=round((1743042 if train else 41620) * fraction))
+
+      # Define classes
+      if train:
+          classes = dataset.default_classes  # all classes
+          # classes = dataset.distinct('ground_truth.detections.label')  # only observed classes
+
+      # Export to YOLO format
+      with warnings.catch_warnings():
+          warnings.filterwarnings("ignore", category=UserWarning, module="fiftyone.utils.yolo")
+          dataset.export(export_dir=str(Path(SETTINGS['datasets_dir']) / name),
+                         dataset_type=fo.types.YOLOv5Dataset,
+                         label_field='ground_truth',
+                         split='val' if split == 'validation' else split,
+                         classes=classes,
+                         overwrite=train)
--- a/ultralytics/cfg/datasets/package-seg.yaml
+++ b/ultralytics/cfg/datasets/package-seg.yaml
@ -0,0 +1,21 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Package-seg dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/segment/package-seg/
+# Example usage: yolo train data=package-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── package-seg  ← downloads here (102 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/package-seg # dataset root dir
+train: images/train # train images (relative to 'path') 1920 images
+val: images/val # val images (relative to 'path') 89 images
+test: test/images # test images (relative to 'path') 188 images
+
+# Classes
+names:
+  0: package
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/package-seg.zip
--- a/ultralytics/cfg/datasets/tiger-pose.yaml
+++ b/ultralytics/cfg/datasets/tiger-pose.yaml
@ -0,0 +1,24 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Tiger Pose dataset by Ultralytics
+# Documentation: https://docs.ultralytics.com/datasets/pose/tiger-pose/
+# Example usage: yolo train data=tiger-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── tiger-pose  ← downloads here (75.3 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/tiger-pose # dataset root dir
+train: train # train images (relative to 'path') 210 images
+val: val # val images (relative to 'path') 53 images
+
+# Keypoints
+kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+
+# Classes
+names:
+  0: tiger
+
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/tiger-pose.zip
--- a/ultralytics/cfg/datasets/xView.yaml
+++ b/ultralytics/cfg/datasets/xView.yaml
@ -0,0 +1,152 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
+# --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
+# Documentation: https://docs.ultralytics.com/datasets/detect/xview/
+# Example usage: yolo train data=xView.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── xView  ← downloads here (20.7 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/xView # dataset root dir
+train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
+val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
+
+# Classes
+names:
+  0: Fixed-wing Aircraft
+  1: Small Aircraft
+  2: Cargo Plane
+  3: Helicopter
+  4: Passenger Vehicle
+  5: Small Car
+  6: Bus
+  7: Pickup Truck
+  8: Utility Truck
+  9: Truck
+  10: Cargo Truck
+  11: Truck w/Box
+  12: Truck Tractor
+  13: Trailer
+  14: Truck w/Flatbed
+  15: Truck w/Liquid
+  16: Crane Truck
+  17: Railway Vehicle
+  18: Passenger Car
+  19: Cargo Car
+  20: Flat Car
+  21: Tank car
+  22: Locomotive
+  23: Maritime Vessel
+  24: Motorboat
+  25: Sailboat
+  26: Tugboat
+  27: Barge
+  28: Fishing Vessel
+  29: Ferry
+  30: Yacht
+  31: Container Ship
+  32: Oil Tanker
+  33: Engineering Vehicle
+  34: Tower crane
+  35: Container Crane
+  36: Reach Stacker
+  37: Straddle Carrier
+  38: Mobile Crane
+  39: Dump Truck
+  40: Haul Truck
+  41: Scraper/Tractor
+  42: Front loader/Bulldozer
+  43: Excavator
+  44: Cement Mixer
+  45: Ground Grader
+  46: Hut/Tent
+  47: Shed
+  48: Building
+  49: Aircraft Hangar
+  50: Damaged Building
+  51: Facility
+  52: Construction Site
+  53: Vehicle Lot
+  54: Helipad
+  55: Storage Tank
+  56: Shipping container lot
+  57: Shipping Container
+  58: Pylon
+  59: Tower
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  import os
+  from pathlib import Path
+
+  import numpy as np
+  from PIL import Image
+  from tqdm import tqdm
+
+  from ultralytics.data.utils import autosplit
+  from ultralytics.utils.ops import xyxy2xywhn
+
+
+  def convert_labels(fname=Path('xView/xView_train.geojson')):
+      # Convert xView geoJSON labels to YOLO format
+      path = fname.parent
+      with open(fname) as f:
+          print(f'Loading {fname}...')
+          data = json.load(f)
+
+      # Make dirs
+      labels = Path(path / 'labels' / 'train')
+      os.system(f'rm -rf {labels}')
+      labels.mkdir(parents=True, exist_ok=True)
+
+      # xView classes 11-94 to 0-59
+      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
+                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
+                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
+                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
+
+      shapes = {}
+      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
+          p = feature['properties']
+          if p['bounds_imcoords']:
+              id = p['image_id']
+              file = path / 'train_images' / id
+              if file.exists():  # 1395.tif missing
+                  try:
+                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
+                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
+                      cls = p['type_id']
+                      cls = xview_class2index[int(cls)]  # xView class to 0-60
+                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
+
+                      # Write YOLO label
+                      if id not in shapes:
+                          shapes[id] = Image.open(file).size
+                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
+                      with open((labels / id).with_suffix('.txt'), 'a') as f:
+                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
+                  except Exception as e:
+                      print(f'WARNING: skipping one label for {file}: {e}')
+
+
+  # Download manually from https://challenge.xviewdataset.org
+  dir = Path(yaml['path'])  # dataset root dir
+  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
+  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
+  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
+  # download(urls, dir=dir)
+
+  # Convert labels
+  convert_labels(dir / 'xView_train.geojson')
+
+  # Move images
+  images = Path(dir / 'images')
+  images.mkdir(parents=True, exist_ok=True)
+  Path(dir / 'train_images').rename(dir / 'images' / 'train')
+  Path(dir / 'val_images').rename(dir / 'images' / 'val')
+
+  # Split
+  autosplit(dir / 'images' / 'train')
--- a/ultralytics/cfg/default.yaml
+++ b/ultralytics/cfg/default.yaml
@ -0,0 +1,127 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default training settings and hyperparameters for medium-augmentation COCO training
+
+task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data: # (str, optional) path to data file, i.e. coco128.yaml
+epochs: 100 # (int) number of epochs to train for
+time: # (float, optional) number of hours to train for, overrides epochs if supplied
+patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
+batch: 16 # (int) number of images per batch (-1 for AutoBatch)
+imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+save: True # (bool) save train checkpoints and predict results
+save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
+val_period: 1 # (int) Validation every x epochs
+cache: False # (bool) True/ram, disk or False. Use cache for data loading
+device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
+project: # (str, optional) project name
+name: # (str, optional) experiment name, results saved to 'project/name' directory
+exist_ok: False # (bool) whether to overwrite existing experiment
+pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
+optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+verbose: True # (bool) whether to print verbose output
+seed: 0 # (int) random seed for reproducibility
+deterministic: True # (bool) whether to enable deterministic mode
+single_cls: False # (bool) train multi-class data as single-class
+rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False # (bool) use cosine learning rate scheduler
+close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
+resume: False # (bool) resume training from last checkpoint
+amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
+profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
+freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+multi_scale: False # (bool) Whether to use multiscale during training
+# Segmentation
+overlap_mask: True # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4 # (int) mask downsample ratio (segment train only)
+# Classification
+dropout: 0.0 # (float) use dropout regularization (classify train only)
+
+# Val/Test settings ----------------------------------------------------------------------------------------------------
+val: True # (bool) validate/test during training
+split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: False # (bool) save results to JSON file
+save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
+conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
+max_det: 300 # (int) maximum number of detections per image
+half: False # (bool) use half precision (FP16)
+dnn: False # (bool) use OpenCV DNN for ONNX inference
+plots: True # (bool) save plots and images during train/val
+
+# Predict settings -----------------------------------------------------------------------------------------------------
+source: # (str, optional) source directory for images or videos
+vid_stride: 1 # (int) video frame-rate stride
+stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
+visualize: False # (bool) visualize model features
+augment: False # (bool) apply image augmentation to prediction sources
+agnostic_nms: False # (bool) class-agnostic NMS
+classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+retina_masks: False # (bool) use high-resolution segmentation masks
+embed: # (list[int], optional) return feature vectors/embeddings from given layers
+
+# Visualize settings ---------------------------------------------------------------------------------------------------
+show: False # (bool) show predicted images and videos if environment allows
+save_frames: False # (bool) save predicted individual video frames
+save_txt: False # (bool) save results as .txt file
+save_conf: False # (bool) save results with confidence scores
+save_crop: False # (bool) save cropped images with results
+show_labels: True # (bool) show prediction labels, i.e. 'person'
+show_conf: True # (bool) show prediction confidence, i.e. '0.99'
+show_boxes: True # (bool) show prediction boxes
+line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
+
+# Export settings ------------------------------------------------------------------------------------------------------
+format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
+keras: False # (bool) use Kera=s
+optimize: False # (bool) TorchScript: optimize for mobile
+int8: False # (bool) CoreML/TF INT8 quantization
+dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
+simplify: False # (bool) ONNX: simplify model using `onnxslim`
+opset: # (int, optional) ONNX: opset version
+workspace: 4 # (int) TensorRT: workspace size (GB)
+nms: False # (bool) CoreML: add NMS
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01 # (float) final learning rate (lr0 * lrf)
+momentum: 0.937 # (float) SGD momentum/Adam beta1
+weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
+warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
+warmup_momentum: 0.8 # (float) warmup initial momentum
+warmup_bias_lr: 0.1 # (float) warmup initial bias lr
+box: 7.5 # (float) box loss gain
+cls: 0.5 # (float) cls loss gain (scale with pixels)
+dfl: 1.5 # (float) dfl loss gain
+pose: 12.0 # (float) pose loss gain
+kobj: 1.0 # (float) keypoint obj loss gain
+label_smoothing: 0.0 # (float) label smoothing (fraction)
+nbs: 64 # (int) nominal batch size
+hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0 # (float) image rotation (+/- deg)
+translate: 0.1 # (float) image translation (+/- fraction)
+scale: 0.5 # (float) image scale (+/- gain)
+shear: 0.0 # (float) image shear (+/- deg)
+perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0 # (float) image flip up-down (probability)
+fliplr: 0.5 # (float) image flip left-right (probability)
+bgr: 0.0 # (float) image channel BGR (probability)
+mosaic: 1.0 # (float) image mosaic (probability)
+mixup: 0.0 # (float) image mixup (probability)
+copy_paste: 0.0 # (float) segment copy-paste (probability)
+auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
+erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
+crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
+
+# Custom config.yaml ---------------------------------------------------------------------------------------------------
+cfg: # (str, optional) for overriding defaults.yaml
+
+# Tracker settings ------------------------------------------------------------------------------------------------------
+tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
--- a/ultralytics/cfg/models/README.md
+++ b/ultralytics/cfg/models/README.md
@ -0,0 +1,40 @@
+## Models
+
+Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
+
+These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
+
+To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
+
+### Usage
+
+Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
+
+```bash
+yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
+```
+
+They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
+
+```python
+from ultralytics import YOLO
+
+model = YOLO("model.yaml")  # build a YOLOv8n model from scratch
+# YOLO("model.pt")  use pre-trained model if available
+model.info()  # display model information
+model.train(data="coco128.yaml", epochs=100)  # train the model
+```
+
+## Pre-trained Model Architectures
+
+Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
+
+## Contribute New Models
+
+Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
+
+By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
+
+To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
+
+Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!
--- a/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-l.yaml
@ -0,0 +1,50 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 48]] # 0-P2/4
+  - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
+
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
+  - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
+
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
+  - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
+
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
+  - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
+
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
+  - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
+
+  - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
+  - [[-1, 17], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
+
+  - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
+  - [[-1, 12], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
+
+  - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml
@ -0,0 +1,42 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml
@ -0,0 +1,42 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
+++ b/ultralytics/cfg/models/rt-detr/rtdetr-x.yaml
@ -0,0 +1,54 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.00, 2048]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 64]] # 0-P2/4
+  - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
+
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
+  - [-1, 6, HGBlock, [128, 512, 3]]
+  - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
+
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
+  - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
+
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
+  - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
+  - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
+
+head:
+  - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
+  - [-1, 1, AIFI, [2048, 8]]
+  - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
+  - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
+
+  - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
+  - [[-1, 21], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
+
+  - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
+  - [[-1, 16], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
+
+  - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10b.yaml
+++ b/ultralytics/cfg/models/v10/yolov10b.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  b: [0.67, 1.00, 512] 
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10l.yaml
+++ b/ultralytics/cfg/models/v10/yolov10l.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10m.yaml
+++ b/ultralytics/cfg/models/v10/yolov10m.yaml
@ -0,0 +1,43 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10n.yaml
+++ b/ultralytics/cfg/models/v10/yolov10n.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] 
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10s.yaml
+++ b/ultralytics/cfg/models/v10/yolov10s.yaml
@ -0,0 +1,39 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  s: [0.33, 0.50, 1024]
+
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v10/yolov10x.yaml
+++ b/ultralytics/cfg/models/v10/yolov10x.yaml
@ -0,0 +1,40 @@
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2fCIB, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3-spp.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-spp.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10
+
+# YOLOv3-SPP head
+head:
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, SPP, [512, [5, 9, 13]]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
+
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
+
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
+
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3-tiny.yaml
+++ b/ultralytics/cfg/models/v3/yolov3-tiny.yaml
@ -0,0 +1,37 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [16, 3, 1]] # 0
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
+  - [-1, 1, Conv, [32, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
+  - [-1, 1, Conv, [64, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
+  - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
+
+# YOLOv3-tiny head
+head:
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
+
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
+
+  - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
--- a/ultralytics/cfg/models/v3/yolov3.yaml
+++ b/ultralytics/cfg/models/v3/yolov3.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10
+
+# YOLOv3 head
+head:
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
+
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
+
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
+
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v5/yolov5-p6.yaml
+++ b/ultralytics/cfg/models/v5/yolov5-p6.yaml
@ -0,0 +1,59 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [768]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv5 v6.0 head
+head:
+  - [-1, 1, Conv, [768, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3, [768, False]] # 15
+
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 19
+
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 23 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 20], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 16], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [768, False]] # 29 (P5/32-large)
+
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
+
+  - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v5/yolov5.yaml
+++ b/ultralytics/cfg/models/v5/yolov5.yaml
@ -0,0 +1,48 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv5 v6.0 head
+head:
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 13
+
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 17 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
+
+  - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v6/yolov6.yaml
+++ b/ultralytics/cfg/models/v6/yolov6.yaml
@ -0,0 +1,53 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
+
+# Parameters
+nc: 80 # number of classes
+activation: nn.ReLU() # (optional) model default activation function
+scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv6-3.0s backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 6, Conv, [128, 3, 1]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 12, Conv, [256, 3, 1]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 18, Conv, [512, 3, 1]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 6, Conv, [1024, 3, 1]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv6-3.0s head
+head:
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]] # 14
+
+  - [-1, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 9, Conv, [128, 3, 1]] # 19
+
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]] # 23
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 9, Conv, [512, 3, 1]] # 27
+
+  - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml
@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml
@ -0,0 +1,25 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-cls.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-cls.yaml
@ -0,0 +1,29 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml
@ -0,0 +1,54 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters,   2033928 gradients,  13.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters,   5562064 gradients,  25.1 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters,   9031712 gradients,  42.8 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients,  69.1 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
+
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0-ghost-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
+
+  - [-1, 1, GhostConv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
+
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml
@ -0,0 +1,56 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
+
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [768, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv8.0-ghost-p6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3Ghost, [768]] # 14
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 17
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
+
+  - [-1, 1, GhostConv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
+
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-ghost.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-ghost.yaml
@ -0,0 +1,47 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers,  1865316 parameters,  1865300 gradients,   5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers,  5960072 parameters,  5960056 gradients,  16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients,  32.7 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients,  53.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients,  83.3 GFLOPs
+
+# YOLOv8.0n-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-obb.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-obb.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-p2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p2.yaml
@ -0,0 +1,54 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
+
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C2f, [256]] # 21 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
+
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-p6.yaml
@ -0,0 +1,56 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose-p6.yaml
@ -0,0 +1,57 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+
+# Parameters
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+
+  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-pose.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-pose.yaml
@ -0,0 +1,47 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+
+# Parameters
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-rtdetr.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg-p6.yaml
@ -0,0 +1,56 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+
+  - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
--- a/ultralytics/cfg/models/v8/yolov8-seg.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-seg.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-world.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-world.yaml
@ -0,0 +1,48 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+
+  - [[15, 12, 9], 1, ImagePoolingAttn, [256]] # 16 (P3/8-small)
+
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 19 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 22 (P5/32-large)
+
+  - [[15, 19, 22], 1, WorldDetect, [nc, 512, False]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
+++ b/ultralytics/cfg/models/v8/yolov8-worldv2.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, WorldDetect, [nc, 512, True]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v8/yolov8.yaml
+++ b/ultralytics/cfg/models/v8/yolov8.yaml
@ -0,0 +1,46 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
--- a/ultralytics/cfg/models/v9/yolov9c.yaml
+++ b/ultralytics/cfg/models/v9/yolov9c.yaml
@ -0,0 +1,36 @@
+# YOLOv9
+
+# parameters
+nc: 80  # number of classes
+
+# gelan backbone
+backbone:
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]]  # 2
+  - [-1, 1, ADown, [256]]  # 3-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]]  # 4
+  - [-1, 1, ADown, [512]]  # 5-P4/16
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 6
+  - [-1, 1, ADown, [512]]  # 7-P5/32
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 8
+  - [-1, 1, SPPELAN, [512, 256]]  # 9
+
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 12
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]]  # 15 (P3/8-small)
+
+  - [-1, 1, ADown, [256]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 18 (P4/16-medium)
+
+  - [-1, 1, ADown, [512]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 21 (P5/32-large)
+
+  - [[15, 18, 21], 1, Detect, [nc]]  # DDetect(P3, P4, P5)
--- a/ultralytics/cfg/models/v9/yolov9e.yaml
+++ b/ultralytics/cfg/models/v9/yolov9e.yaml
@ -0,0 +1,60 @@
+# YOLOv9
+
+# parameters
+nc: 80  # number of classes
+
+# gelan backbone
+backbone:
+  - [-1, 1, Silence, []]
+  - [-1, 1, Conv, [64, 3, 2]]  # 1-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 2-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 3
+  - [-1, 1, ADown, [256]]  # 4-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 5
+  - [-1, 1, ADown, [512]]  # 6-P4/16
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 7
+  - [-1, 1, ADown, [1024]]  # 8-P5/32
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 9
+   
+  - [1, 1, CBLinear, [[64]]] # 10
+  - [3, 1, CBLinear, [[64, 128]]] # 11
+  - [5, 1, CBLinear, [[64, 128, 256]]] # 12
+  - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
+  - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
+  
+  - [0, 1, Conv, [64, 3, 2]]  # 15-P1/2
+  - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
+  - [-1, 1, Conv, [128, 3, 2]]  # 17-P2/4
+  - [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]] # 18  
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 19
+  - [-1, 1, ADown, [256]]  # 20-P3/8
+  - [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]] # 21  
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 22
+  - [-1, 1, ADown, [512]]  # 23-P4/16
+  - [[13, 14, -1], 1, CBFuse, [[3, 3]]] # 24 
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 25
+  - [-1, 1, ADown, [1024]]  # 26-P5/32
+  - [[14, -1], 1, CBFuse, [[4]]] # 27
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 28
+  - [-1, 1, SPPELAN, [512, 256]]  # 29
+
+# gelan head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 25], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 32
+
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 22], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]]  # 35 (P3/8-small)
+
+  - [-1, 1, ADown, [256]]
+  - [[-1, 32], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 38 (P4/16-medium)
+
+  - [-1, 1, ADown, [512]]
+  - [[-1, 29], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]]  # 41 (P5/32-large)
+
+   # detect
+  - [[35, 38, 41], 1, Detect, [nc]]  # Detect(P3, P4, P5)
--- a/ultralytics/cfg/trackers/botsort.yaml
+++ b/ultralytics/cfg/trackers/botsort.yaml
@ -0,0 +1,18 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
+
+tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
+
+# BoT-SORT settings
+gmc_method: sparseOptFlow # method of global motion compensation
+# ReID model related thresh (not supported yet)
+proximity_thresh: 0.5
+appearance_thresh: 0.25
+with_reid: False
--- a/ultralytics/cfg/trackers/bytetrack.yaml
+++ b/ultralytics/cfg/trackers/bytetrack.yaml
@ -0,0 +1,11 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
+
+tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
--- a/ultralytics/data/init.py
+++ b/ultralytics/data/init.py
@ -0,0 +1,15 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from .base import BaseDataset
+from .build import build_dataloader, build_yolo_dataset, load_inference_source
+from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
+
+__all__ = (
+    "BaseDataset",
+    "ClassificationDataset",
+    "SemanticDataset",
+    "YOLODataset",
+    "build_yolo_dataset",
+    "build_dataloader",
+    "load_inference_source",
+)
--- a/ultralytics/data/pycache/init.cpython-312.pyc
+++ b/ultralytics/data/pycache/init.cpython-312.pyc
--- a/ultralytics/data/pycache/init.cpython-39.pyc
+++ b/ultralytics/data/pycache/init.cpython-39.pyc
--- a/ultralytics/data/pycache/augment.cpython-312.pyc
+++ b/ultralytics/data/pycache/augment.cpython-312.pyc
--- a/ultralytics/data/pycache/augment.cpython-39.pyc
+++ b/ultralytics/data/pycache/augment.cpython-39.pyc
--- a/ultralytics/data/pycache/base.cpython-312.pyc
+++ b/ultralytics/data/pycache/base.cpython-312.pyc
--- a/ultralytics/data/pycache/base.cpython-39.pyc
+++ b/ultralytics/data/pycache/base.cpython-39.pyc
--- a/ultralytics/data/pycache/build.cpython-312.pyc
+++ b/ultralytics/data/pycache/build.cpython-312.pyc
--- a/ultralytics/data/pycache/build.cpython-39.pyc
+++ b/ultralytics/data/pycache/build.cpython-39.pyc
--- a/ultralytics/data/pycache/converter.cpython-312.pyc
+++ b/ultralytics/data/pycache/converter.cpython-312.pyc
--- a/ultralytics/data/pycache/converter.cpython-39.pyc
+++ b/ultralytics/data/pycache/converter.cpython-39.pyc
--- a/ultralytics/data/pycache/dataset.cpython-312.pyc
+++ b/ultralytics/data/pycache/dataset.cpython-312.pyc
--- a/ultralytics/data/pycache/dataset.cpython-39.pyc
+++ b/ultralytics/data/pycache/dataset.cpython-39.pyc
--- a/ultralytics/data/pycache/loaders.cpython-312.pyc
+++ b/ultralytics/data/pycache/loaders.cpython-312.pyc
--- a/ultralytics/data/pycache/loaders.cpython-39.pyc
+++ b/ultralytics/data/pycache/loaders.cpython-39.pyc
--- a/ultralytics/data/pycache/utils.cpython-312.pyc
+++ b/ultralytics/data/pycache/utils.cpython-312.pyc
--- a/ultralytics/data/pycache/utils.cpython-39.pyc
+++ b/ultralytics/data/pycache/utils.cpython-39.pyc
--- a/ultralytics/data/annotator.py
+++ b/ultralytics/data/annotator.py
@ -0,0 +1,50 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from pathlib import Path
+
+from ultralytics import SAM, YOLO
+
+
+def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
+    """
+    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
+
+    Args:
+        data (str): Path to a folder containing images to be annotated.
+        det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
+        sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
+        device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
+        output_dir (str | None | optional): Directory to save the annotated results.
+            Defaults to a 'labels' folder in the same directory as 'data'.
+
+    Example:
+        ```python
+        from ultralytics.data.annotator import auto_annotate
+
+        auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
+        ```
+    """
+    det_model = YOLO(det_model)
+    sam_model = SAM(sam_model)
+
+    data = Path(data)
+    if not output_dir:
+        output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
+    Path(output_dir).mkdir(exist_ok=True, parents=True)
+
+    det_results = det_model(data, stream=True, device=device)
+
+    for result in det_results:
+        class_ids = result.boxes.cls.int().tolist()  # noqa
+        if len(class_ids):
+            boxes = result.boxes.xyxy  # Boxes object for bbox outputs
+            sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
+            segments = sam_results[0].masks.xyn  # noqa
+
+            with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
+                for i in range(len(segments)):
+                    s = segments[i]
+                    if len(s) == 0:
+                        continue
+                    segment = map(str, segments[i].reshape(-1).tolist())
+                    f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
--- a/ultralytics/data/base.py
+++ b/ultralytics/data/base.py
@ -0,0 +1,311 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import glob
+import math
+import os
+import random
+from copy import deepcopy
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+from typing import Optional
+
+import cv2
+import numpy as np
+import psutil
+from torch.utils.data import Dataset
+
+from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
+from .utils import HELP_URL, IMG_FORMATS
+
+
+class BaseDataset(Dataset):
+    """
+    Base dataset class for loading and processing image data.
+
+    Args:
+        img_path (str): Path to the folder containing images.
+        imgsz (int, optional): Image size. Defaults to 640.
+        cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
+        augment (bool, optional): If True, data augmentation is applied. Defaults to True.
+        hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
+        prefix (str, optional): Prefix to print in log messages. Defaults to ''.
+        rect (bool, optional): If True, rectangular training is used. Defaults to False.
+        batch_size (int, optional): Size of batches. Defaults to None.
+        stride (int, optional): Stride. Defaults to 32.
+        pad (float, optional): Padding. Defaults to 0.0.
+        single_cls (bool, optional): If True, single class training is used. Defaults to False.
+        classes (list): List of included classes. Default is None.
+        fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
+
+    Attributes:
+        im_files (list): List of image file paths.
+        labels (list): List of label data dictionaries.
+        ni (int): Number of images in the dataset.
+        ims (list): List of loaded images.
+        npy_files (list): List of numpy file paths.
+        transforms (callable): Image transformation function.
+    """
+
+    def __init__(
+        self,
+        img_path,
+        imgsz=640,
+        cache=False,
+        augment=True,
+        hyp=DEFAULT_CFG,
+        prefix="",
+        rect=False,
+        batch_size=16,
+        stride=32,
+        pad=0.5,
+        single_cls=False,
+        classes=None,
+        fraction=1.0,
+    ):
+        """Initialize BaseDataset with given configuration and options."""
+        super().__init__()
+        self.img_path = img_path
+        self.imgsz = imgsz
+        self.augment = augment
+        self.single_cls = single_cls
+        self.prefix = prefix
+        self.fraction = fraction
+        self.im_files = self.get_img_files(self.img_path)
+        self.labels = self.get_labels()
+        self.update_labels(include_class=classes)  # single_cls and include_class
+        self.ni = len(self.labels)  # number of images
+        self.rect = rect
+        self.batch_size = batch_size
+        self.stride = stride
+        self.pad = pad
+        if self.rect:
+            assert self.batch_size is not None
+            self.set_rectangle()
+
+        # Buffer thread for mosaic images
+        self.buffer = []  # buffer size = batch size
+        self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0
+
+        # Cache images
+        if cache == "ram" and not self.check_cache_ram():
+            cache = False
+        self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
+        self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
+        if cache:
+            self.cache_images(cache)
+
+        # Transforms
+        self.transforms = self.build_transforms(hyp=hyp)
+
+    def get_img_files(self, img_path):
+        """Read image files."""
+        try:
+            f = []  # image files
+            for p in img_path if isinstance(img_path, list) else [img_path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / "**" / "*.*"), recursive=True)
+                    # F = list(p.rglob('*.*'))  # pathlib
+                elif p.is_file():  # file
+                    with open(p) as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace("./", parent) if x.startswith("./") else x for x in t]  # local to global path
+                        # F += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
+                else:
+                    raise FileNotFoundError(f"{self.prefix}{p} does not exist")
+            im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS)
+            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
+            assert im_files, f"{self.prefix}No images found in {img_path}"
+        except Exception as e:
+            raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n{HELP_URL}") from e
+        if self.fraction < 1:
+            # im_files = im_files[: round(len(im_files) * self.fraction)]
+            num_elements_to_select = round(len(im_files) * self.fraction)
+            im_files = random.sample(im_files, num_elements_to_select)
+        return im_files
+
+    def update_labels(self, include_class: Optional[list]):
+        """Update labels to include only these classes (optional)."""
+        include_class_array = np.array(include_class).reshape(1, -1)
+        for i in range(len(self.labels)):
+            if include_class is not None:
+                cls = self.labels[i]["cls"]
+                bboxes = self.labels[i]["bboxes"]
+                segments = self.labels[i]["segments"]
+                keypoints = self.labels[i]["keypoints"]
+                j = (cls == include_class_array).any(1)
+                self.labels[i]["cls"] = cls[j]
+                self.labels[i]["bboxes"] = bboxes[j]
+                if segments:
+                    self.labels[i]["segments"] = [segments[si] for si, idx in enumerate(j) if idx]
+                if keypoints is not None:
+                    self.labels[i]["keypoints"] = keypoints[j]
+            if self.single_cls:
+                self.labels[i]["cls"][:, 0] = 0
+
+    def load_image(self, i, rect_mode=True):
+        """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
+        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
+        if im is None:  # not cached in RAM
+            if fn.exists():  # load npy
+                try:
+                    im = np.load(fn)
+                except Exception as e:
+                    LOGGER.warning(f"{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}")
+                    Path(fn).unlink(missing_ok=True)
+                    im = cv2.imread(f)  # BGR
+            else:  # read image
+                im = cv2.imread(f)  # BGR
+            if im is None:
+                raise FileNotFoundError(f"Image Not Found {f}")
+
+            h0, w0 = im.shape[:2]  # orig hw
+            if rect_mode:  # resize long side to imgsz while maintaining aspect ratio
+                r = self.imgsz / max(h0, w0)  # ratio
+                if r != 1:  # if sizes are not equal
+                    w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
+                    im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+            elif not (h0 == w0 == self.imgsz):  # resize by stretching image to square imgsz
+                im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
+
+            # Add to buffer if training with augmentations
+            if self.augment:
+                self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
+                self.buffer.append(i)
+                if len(self.buffer) >= self.max_buffer_length:
+                    j = self.buffer.pop(0)
+                    self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
+
+            return im, (h0, w0), im.shape[:2]
+
+        return self.ims[i], self.im_hw0[i], self.im_hw[i]
+
+    def cache_images(self, cache):
+        """Cache images to memory or disk."""
+        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+        fcn = self.cache_images_to_disk if cache == "disk" else self.load_image
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(fcn, range(self.ni))
+            pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
+            for i, x in pbar:
+                if cache == "disk":
+                    b += self.npy_files[i].stat().st_size
+                else:  # 'ram'
+                    self.ims[i], self.im_hw0[i], self.im_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
+                    b += self.ims[i].nbytes
+                pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {cache})"
+            pbar.close()
+
+    def cache_images_to_disk(self, i):
+        """Saves an image as an *.npy file for faster loading."""
+        f = self.npy_files[i]
+        if not f.exists():
+            np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
+
+    def check_cache_ram(self, safety_margin=0.5):
+        """Check image caching requirements vs available memory."""
+        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+        n = min(self.ni, 30)  # extrapolate from 30 random images
+        for _ in range(n):
+            im = cv2.imread(random.choice(self.im_files))  # sample image
+            ratio = self.imgsz / max(im.shape[0], im.shape[1])  # max(h, w)  # ratio
+            b += im.nbytes * ratio**2
+        mem_required = b * self.ni / n * (1 + safety_margin)  # GB required to cache dataset into RAM
+        mem = psutil.virtual_memory()
+        cache = mem_required < mem.available  # to cache or not to cache, that is the question
+        if not cache:
+            LOGGER.info(
+                f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
+                f'with {int(safety_margin * 100)}% safety margin but only '
+                f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
+                f"{'caching images ✅' if cache else 'not caching images ⚠️'}"
+            )
+        return cache
+
+    def set_rectangle(self):
+        """Sets the shape of bounding boxes for YOLO detections as rectangles."""
+        bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int)  # batch index
+        nb = bi[-1] + 1  # number of batches
+
+        s = np.array([x.pop("shape") for x in self.labels])  # hw
+        ar = s[:, 0] / s[:, 1]  # aspect ratio
+        irect = ar.argsort()
+        self.im_files = [self.im_files[i] for i in irect]
+        self.labels = [self.labels[i] for i in irect]
+        ar = ar[irect]
+
+        # Set training image shapes
+        shapes = [[1, 1]] * nb
+        for i in range(nb):
+            ari = ar[bi == i]
+            mini, maxi = ari.min(), ari.max()
+            if maxi < 1:
+                shapes[i] = [maxi, 1]
+            elif mini > 1:
+                shapes[i] = [1, 1 / mini]
+
+        self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
+        self.batch = bi  # batch index of image
+
+    def __getitem__(self, index):
+        """Returns transformed label information for given index."""
+        return self.transforms(self.get_image_and_label(index))
+
+    def get_image_and_label(self, index):
+        """Get and return label information from the dataset."""
+        label = deepcopy(self.labels[index])  # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
+        label.pop("shape", None)  # shape is for rect, remove it
+        label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index)
+        label["ratio_pad"] = (
+            label["resized_shape"][0] / label["ori_shape"][0],
+            label["resized_shape"][1] / label["ori_shape"][1],
+        )  # for evaluation
+        if self.rect:
+            label["rect_shape"] = self.batch_shapes[self.batch[index]]
+        return self.update_labels_info(label)
+
+    def __len__(self):
+        """Returns the length of the labels list for the dataset."""
+        return len(self.labels)
+
+    def update_labels_info(self, label):
+        """Custom your label format here."""
+        return label
+
+    def build_transforms(self, hyp=None):
+        """
+        Users can customize augmentations here.
+
+        Example:
+            ```python
+            if self.augment:
+                # Training transforms
+                return Compose([])
+            else:
+                # Val transforms
+                return Compose([])
+            ```
+        """
+        raise NotImplementedError
+
+    def get_labels(self):
+        """
+        Users can customize their own format here.
+
+        Note:
+            Ensure output is a dictionary with the following keys:
+            ```python
+            dict(
+                im_file=im_file,
+                shape=shape,  # format: (height, width)
+                cls=cls,
+                bboxes=bboxes, # xywh
+                segments=segments,  # xy
+                keypoints=keypoints, # xy
+                normalized=True, # or False
+                bbox_format="xyxy",  # or xywh, ltwh
+            )
+            ```
+        """
+        raise NotImplementedError
--- a/ultralytics/data/build.py
+++ b/ultralytics/data/build.py
@ -0,0 +1,186 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import os
+import random
+from pathlib import Path
+
+import numpy as np
+import torch
+from PIL import Image
+from torch.utils.data import dataloader, distributed
+
+from ultralytics.data.loaders import (
+    LOADERS,
+    LoadImagesAndVideos,
+    LoadPilAndNumpy,
+    LoadScreenshots,
+    LoadStreams,
+    LoadTensor,
+    SourceTypes,
+    autocast_list,
+)
+from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
+from ultralytics.utils import RANK, colorstr
+from ultralytics.utils.checks import check_file
+from .dataset import YOLODataset
+from .utils import PIN_MEMORY
+
+
+class InfiniteDataLoader(dataloader.DataLoader):
+    """
+    Dataloader that reuses workers.
+
+    Uses same syntax as vanilla DataLoader.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Dataloader that infinitely recycles workers, inherits from DataLoader."""
+        super().__init__(*args, **kwargs)
+        object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
+        self.iterator = super().__iter__()
+
+    def __len__(self):
+        """Returns the length of the batch sampler's sampler."""
+        return len(self.batch_sampler.sampler)
+
+    def __iter__(self):
+        """Creates a sampler that repeats indefinitely."""
+        for _ in range(len(self)):
+            yield next(self.iterator)
+
+    def reset(self):
+        """
+        Reset iterator.
+
+        This is useful when we want to modify settings of dataset while training.
+        """
+        self.iterator = self._get_iterator()
+
+
+class _RepeatSampler:
+    """
+    Sampler that repeats forever.
+
+    Args:
+        sampler (Dataset.sampler): The sampler to repeat.
+    """
+
+    def __init__(self, sampler):
+        """Initializes an object that repeats a given sampler indefinitely."""
+        self.sampler = sampler
+
+    def __iter__(self):
+        """Iterates over the 'sampler' and yields its contents."""
+        while True:
+            yield from iter(self.sampler)
+
+
+def seed_worker(worker_id):  # noqa
+    """Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
+    worker_seed = torch.initial_seed() % 2**32
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)
+
+
+def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
+    """Build YOLO Dataset."""
+    return YOLODataset(
+        img_path=img_path,
+        imgsz=cfg.imgsz,
+        batch_size=batch,
+        augment=mode == "train",  # augmentation
+        hyp=cfg,  # TODO: probably add a get_hyps_from_cfg function
+        rect=cfg.rect or rect,  # rectangular batches
+        cache=cfg.cache or None,
+        single_cls=cfg.single_cls or False,
+        stride=int(stride),
+        pad=0.0 if mode == "train" else 0.5,
+        prefix=colorstr(f"{mode}: "),
+        task=cfg.task,
+        classes=cfg.classes,
+        data=data,
+        fraction=cfg.fraction if mode == "train" else 1.0,
+    )
+
+
+def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
+    """Return an InfiniteDataLoader or DataLoader for training or validation set."""
+    batch = min(batch, len(dataset))
+    nd = torch.cuda.device_count()  # number of CUDA devices
+    nw = min([os.cpu_count() // max(nd, 1), workers])  # number of workers
+    sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
+    generator = torch.Generator()
+    generator.manual_seed(6148914691236517205 + RANK)
+    return InfiniteDataLoader(
+        dataset=dataset,
+        batch_size=batch,
+        shuffle=shuffle and sampler is None,
+        num_workers=nw,
+        sampler=sampler,
+        pin_memory=PIN_MEMORY,
+        collate_fn=getattr(dataset, "collate_fn", None),
+        worker_init_fn=seed_worker,
+        generator=generator,
+    )
+
+
+def check_source(source):
+    """Check source type and return corresponding flag values."""
+    webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
+    if isinstance(source, (str, int, Path)):  # int for local usb camera
+        source = str(source)
+        is_file = Path(source).suffix[1:] in (IMG_FORMATS | VID_FORMATS)
+        is_url = source.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
+        webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
+        screenshot = source.lower() == "screen"
+        if is_url and is_file:
+            source = check_file(source)  # download
+    elif isinstance(source, LOADERS):
+        in_memory = True
+    elif isinstance(source, (list, tuple)):
+        source = autocast_list(source)  # convert all list elements to PIL or np arrays
+        from_img = True
+    elif isinstance(source, (Image.Image, np.ndarray)):
+        from_img = True
+    elif isinstance(source, torch.Tensor):
+        tensor = True
+    else:
+        raise TypeError("Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict")
+
+    return source, webcam, screenshot, from_img, in_memory, tensor
+
+
+def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False):
+    """
+    Loads an inference source for object detection and applies necessary transformations.
+
+    Args:
+        source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
+        batch (int, optional): Batch size for dataloaders. Default is 1.
+        vid_stride (int, optional): The frame interval for video sources. Default is 1.
+        buffer (bool, optional): Determined whether stream frames will be buffered. Default is False.
+
+    Returns:
+        dataset (Dataset): A dataset object for the specified input source.
+    """
+    source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
+    source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
+
+    # Dataloader
+    if tensor:
+        dataset = LoadTensor(source)
+    elif in_memory:
+        dataset = source
+    elif stream:
+        dataset = LoadStreams(source, vid_stride=vid_stride, buffer=buffer)
+    elif screenshot:
+        dataset = LoadScreenshots(source)
+    elif from_img:
+        dataset = LoadPilAndNumpy(source)
+    else:
+        dataset = LoadImagesAndVideos(source, batch=batch, vid_stride=vid_stride)
+
+    # Attach source types to the dataset
+    setattr(dataset, "source_type", source_type)
+
+    return dataset
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@ -0,0 +1,542 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+import json
+from collections import defaultdict
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils.files import increment_path
+
+
+def coco91_to_coco80_class():
+    """
+    Converts 91-index COCO class IDs to 80-index COCO class IDs.
+
+    Returns:
+        (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
+            corresponding 91-index class ID.
+    """
+    return [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        None,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        None,
+        24,
+        25,
+        None,
+        None,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        None,
+        40,
+        41,
+        42,
+        43,
+        44,
+        45,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        None,
+        60,
+        None,
+        None,
+        61,
+        None,
+        62,
+        63,
+        64,
+        65,
+        66,
+        67,
+        68,
+        69,
+        70,
+        71,
+        72,
+        None,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        None,
+    ]
+
+
+def coco80_to_coco91_class():
+    """
+    Converts 80-index (val2014) to 91-index (paper).
+    For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
+
+    Example:
+        ```python
+        import numpy as np
+
+        a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
+        b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
+        x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
+        x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
+        ```
+    """
+    return [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        27,
+        28,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        43,
+        44,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        64,
+        65,
+        67,
+        70,
+        72,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        80,
+        81,
+        82,
+        84,
+        85,
+        86,
+        87,
+        88,
+        89,
+        90,
+    ]
+
+
+def convert_coco(
+    labels_dir="../coco/annotations/",
+    save_dir="coco_converted/",
+    use_segments=False,
+    use_keypoints=False,
+    cls91to80=True,
+):
+    """
+    Converts COCO dataset annotations to a YOLO annotation format  suitable for training YOLO models.
+
+    Args:
+        labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
+        save_dir (str, optional): Path to directory to save results to.
+        use_segments (bool, optional): Whether to include segmentation masks in the output.
+        use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
+        cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
+
+    Example:
+        ```python
+        from ultralytics.data.converter import convert_coco
+
+        convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
+        ```
+
+    Output:
+        Generates output files in the specified output directory.
+    """
+
+    # Create dataset directory
+    save_dir = increment_path(save_dir)  # increment if save directory already exists
+    for p in save_dir / "labels", save_dir / "images":
+        p.mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Convert classes
+    coco80 = coco91_to_coco80_class()
+
+    # Import json
+    for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
+        fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "")  # folder name
+        fn.mkdir(parents=True, exist_ok=True)
+        with open(json_file) as f:
+            data = json.load(f)
+
+        # Create image dict
+        images = {f'{x["id"]:d}': x for x in data["images"]}
+        # Create image-annotations dict
+        imgToAnns = defaultdict(list)
+        for ann in data["annotations"]:
+            imgToAnns[ann["image_id"]].append(ann)
+
+        # Write labels file
+        for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
+            img = images[f"{img_id:d}"]
+            h, w, f = img["height"], img["width"], img["file_name"]
+
+            bboxes = []
+            segments = []
+            keypoints = []
+            for ann in anns:
+                if ann["iscrowd"]:
+                    continue
+                # The COCO box format is [top left x, top left y, width, height]
+                box = np.array(ann["bbox"], dtype=np.float64)
+                box[:2] += box[2:] / 2  # xy top-left corner to center
+                box[[0, 2]] /= w  # normalize x
+                box[[1, 3]] /= h  # normalize y
+                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
+                    continue
+
+                cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1  # class
+                box = [cls] + box.tolist()
+                if box not in bboxes:
+                    bboxes.append(box)
+                    if use_segments and ann.get("segmentation") is not None:
+                        if len(ann["segmentation"]) == 0:
+                            segments.append([])
+                            continue
+                        elif len(ann["segmentation"]) > 1:
+                            s = merge_multi_segment(ann["segmentation"])
+                            s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
+                        else:
+                            s = [j for i in ann["segmentation"] for j in i]  # all segments concatenated
+                            s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
+                        s = [cls] + s
+                        segments.append(s)
+                    if use_keypoints and ann.get("keypoints") is not None:
+                        keypoints.append(
+                            box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
+                        )
+
+            # Write
+            with open((fn / f).with_suffix(".txt"), "a") as file:
+                for i in range(len(bboxes)):
+                    if use_keypoints:
+                        line = (*(keypoints[i]),)  # cls, box, keypoints
+                    else:
+                        line = (
+                            *(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
+                        )  # cls, box or segments
+                    file.write(("%g " * len(line)).rstrip() % line + "\n")
+
+    LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")
+
+
+def convert_dota_to_yolo_obb(dota_root_path: str):
+    """
+    Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
+
+    The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
+    associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
+
+    Args:
+        dota_root_path (str): The root directory path of the DOTA dataset.
+
+    Example:
+        ```python
+        from ultralytics.data.converter import convert_dota_to_yolo_obb
+
+        convert_dota_to_yolo_obb('path/to/DOTA')
+        ```
+
+    Notes:
+        The directory structure assumed for the DOTA dataset:
+
+            - DOTA
+                ├─ images
+                │   ├─ train
+                │   └─ val
+                └─ labels
+                    ├─ train_original
+                    └─ val_original
+
+        After execution, the function will organize the labels into:
+
+            - DOTA
+                └─ labels
+                    ├─ train
+                    └─ val
+    """
+    dota_root_path = Path(dota_root_path)
+
+    # Class names to indices mapping
+    class_mapping = {
+        "plane": 0,
+        "ship": 1,
+        "storage-tank": 2,
+        "baseball-diamond": 3,
+        "tennis-court": 4,
+        "basketball-court": 5,
+        "ground-track-field": 6,
+        "harbor": 7,
+        "bridge": 8,
+        "large-vehicle": 9,
+        "small-vehicle": 10,
+        "helicopter": 11,
+        "roundabout": 12,
+        "soccer-ball-field": 13,
+        "swimming-pool": 14,
+        "container-crane": 15,
+        "airport": 16,
+        "helipad": 17,
+    }
+
+    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
+        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
+        orig_label_path = orig_label_dir / f"{image_name}.txt"
+        save_path = save_dir / f"{image_name}.txt"
+
+        with orig_label_path.open("r") as f, save_path.open("w") as g:
+            lines = f.readlines()
+            for line in lines:
+                parts = line.strip().split()
+                if len(parts) < 9:
+                    continue
+                class_name = parts[8]
+                class_idx = class_mapping[class_name]
+                coords = [float(p) for p in parts[:8]]
+                normalized_coords = [
+                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
+                ]
+                formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
+                g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
+
+    for phase in ["train", "val"]:
+        image_dir = dota_root_path / "images" / phase
+        orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
+        save_dir = dota_root_path / "labels" / phase
+
+        save_dir.mkdir(parents=True, exist_ok=True)
+
+        image_paths = list(image_dir.iterdir())
+        for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
+            if image_path.suffix != ".png":
+                continue
+            image_name_without_ext = image_path.stem
+            img = cv2.imread(str(image_path))
+            h, w = img.shape[:2]
+            convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
+
+
+def min_index(arr1, arr2):
+    """
+    Find a pair of indexes with the shortest distance between two arrays of 2D points.
+
+    Args:
+        arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points.
+        arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
+
+    Returns:
+        (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
+    """
+    dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
+    return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
+
+
+def merge_multi_segment(segments):
+    """
+    Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
+    This function connects these coordinates with a thin line to merge all segments into one.
+
+    Args:
+        segments (List[List]): Original segmentations in COCO's JSON file.
+                               Each element is a list of coordinates, like [segmentation1, segmentation2,...].
+
+    Returns:
+        s (List[np.ndarray]): A list of connected segments represented as NumPy arrays.
+    """
+    s = []
+    segments = [np.array(i).reshape(-1, 2) for i in segments]
+    idx_list = [[] for _ in range(len(segments))]
+
+    # Record the indexes with min distance between each segment
+    for i in range(1, len(segments)):
+        idx1, idx2 = min_index(segments[i - 1], segments[i])
+        idx_list[i - 1].append(idx1)
+        idx_list[i].append(idx2)
+
+    # Use two round to connect all the segments
+    for k in range(2):
+        # Forward connection
+        if k == 0:
+            for i, idx in enumerate(idx_list):
+                # Middle segments have two indexes, reverse the index of middle segments
+                if len(idx) == 2 and idx[0] > idx[1]:
+                    idx = idx[::-1]
+                    segments[i] = segments[i][::-1, :]
+
+                segments[i] = np.roll(segments[i], -idx[0], axis=0)
+                segments[i] = np.concatenate([segments[i], segments[i][:1]])
+                # Deal with the first segment and the last one
+                if i in [0, len(idx_list) - 1]:
+                    s.append(segments[i])
+                else:
+                    idx = [0, idx[1] - idx[0]]
+                    s.append(segments[i][idx[0] : idx[1] + 1])
+
+        else:
+            for i in range(len(idx_list) - 1, -1, -1):
+                if i not in [0, len(idx_list) - 1]:
+                    idx = idx_list[i]
+                    nidx = abs(idx[1] - idx[0])
+                    s.append(segments[i][nidx:])
+    return s
+
+
+def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
+    """
+    Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
+    in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
+
+    Args:
+        im_dir (str | Path): Path to image directory to convert.
+        save_dir (str | Path): Path to save the generated labels, labels will be saved
+            into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
+        sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
+
+    Notes:
+        The input directory structure assumed for dataset:
+
+            - im_dir
+                ├─ 001.jpg
+                ├─ ..
+                └─ NNN.jpg
+            - labels
+                ├─ 001.txt
+                ├─ ..
+                └─ NNN.txt
+    """
+    from ultralytics.data import YOLODataset
+    from ultralytics.utils.ops import xywh2xyxy
+    from ultralytics.utils import LOGGER
+    from ultralytics import SAM
+    from tqdm import tqdm
+
+    # NOTE: add placeholder to pass class index check
+    dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))
+    if len(dataset.labels[0]["segments"]) > 0:  # if it's segment data
+        LOGGER.info("Segmentation labels detected, no need to generate new ones!")
+        return
+
+    LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
+    sam_model = SAM(sam_model)
+    for l in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
+        h, w = l["shape"]
+        boxes = l["bboxes"]
+        if len(boxes) == 0:  # skip empty labels
+            continue
+        boxes[:, [0, 2]] *= w
+        boxes[:, [1, 3]] *= h
+        im = cv2.imread(l["im_file"])
+        sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
+        l["segments"] = sam_results[0].masks.xyn
+
+    save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    for l in dataset.labels:
+        texts = []
+        lb_name = Path(l["im_file"]).with_suffix(".txt").name
+        txt_file = save_dir / lb_name
+        cls = l["cls"]
+        for i, s in enumerate(l["segments"]):
+            line = (int(cls[i]), *s.reshape(-1))
+            texts.append(("%g " * len(line)).rstrip() % line)
+        if texts:
+            with open(txt_file, "a") as f:
+                f.writelines(text + "\n" for text in texts)
+    LOGGER.info(f"Generated segment labels saved in {save_dir}")
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@ -0,0 +1,383 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import contextlib
+from itertools import repeat
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+
+import cv2
+import numpy as np
+import torch
+import torchvision
+from PIL import Image
+
+from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
+from ultralytics.utils.ops import resample_segments
+from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
+from .base import BaseDataset
+from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
+
+# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
+DATASET_CACHE_VERSION = "1.0.3"
+
+
+class YOLODataset(BaseDataset):
+    """
+    Dataset class for loading object detection and/or segmentation labels in YOLO format.
+
+    Args:
+        data (dict, optional): A dataset YAML dictionary. Defaults to None.
+        task (str): An explicit arg to point current task, Defaults to 'detect'.
+
+    Returns:
+        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
+    """
+
+    def __init__(self, *args, data=None, task="detect", **kwargs):
+        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
+        self.use_segments = task == "segment"
+        self.use_keypoints = task == "pose"
+        self.use_obb = task == "obb"
+        self.data = data
+        assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
+        super().__init__(*args, **kwargs)
+
+    def cache_labels(self, path=Path("./labels.cache")):
+        """
+        Cache dataset labels, check images and read shapes.
+
+        Args:
+            path (Path): Path where to save the cache file. Default is Path('./labels.cache').
+
+        Returns:
+            (dict): labels.
+        """
+        x = {"labels": []}
+        nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
+        desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
+        total = len(self.im_files)
+        nkpt, ndim = self.data.get("kpt_shape", (0, 0))
+        if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
+            raise ValueError(
+                "'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
+                "keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'"
+            )
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(
+                func=verify_image_label,
+                iterable=zip(
+                    self.im_files,
+                    self.label_files,
+                    repeat(self.prefix),
+                    repeat(self.use_keypoints),
+                    repeat(len(self.data["names"])),
+                    repeat(nkpt),
+                    repeat(ndim),
+                ),
+            )
+            pbar = TQDM(results, desc=desc, total=total)
+            for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
+                nm += nm_f
+                nf += nf_f
+                ne += ne_f
+                nc += nc_f
+                if im_file:
+                    x["labels"].append(
+                        dict(
+                            im_file=im_file,
+                            shape=shape,
+                            cls=lb[:, 0:1],  # n, 1
+                            bboxes=lb[:, 1:],  # n, 4
+                            segments=segments,
+                            keypoints=keypoint,
+                            normalized=True,
+                            bbox_format="xywh",
+                        )
+                    )
+                if msg:
+                    msgs.append(msg)
+                pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt"
+            pbar.close()
+
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        if nf == 0:
+            LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}")
+        x["hash"] = get_hash(self.label_files + self.im_files)
+        x["results"] = nf, nm, ne, nc, len(self.im_files)
+        x["msgs"] = msgs  # warnings
+        save_dataset_cache_file(self.prefix, path, x)
+        return x
+
+    def get_labels(self):
+        """Returns dictionary of labels for YOLO training."""
+        self.label_files = img2label_paths(self.im_files)
+        cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
+        try:
+            cache, exists = load_dataset_cache_file(cache_path), True  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash(self.label_files + self.im_files)  # identical hash
+        except (FileNotFoundError, AssertionError, AttributeError):
+            cache, exists = self.cache_labels(cache_path), False  # run cache ops
+
+        # Display cache
+        nf, nm, ne, nc, n = cache.pop("results")  # found, missing, empty, corrupt, total
+        if exists and LOCAL_RANK in (-1, 0):
+            d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt"
+            TQDM(None, desc=self.prefix + d, total=n, initial=n)  # display results
+            if cache["msgs"]:
+                LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+
+        # Read cache
+        [cache.pop(k) for k in ("hash", "version", "msgs")]  # remove items
+        labels = cache["labels"]
+        if not labels:
+            LOGGER.warning(f"WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}")
+        self.im_files = [lb["im_file"] for lb in labels]  # update im_files
+
+        # Check if the dataset is all boxes or all segments
+        lengths = ((len(lb["cls"]), len(lb["bboxes"]), len(lb["segments"])) for lb in labels)
+        len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
+        if len_segments and len_boxes != len_segments:
+            LOGGER.warning(
+                f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
+                f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
+                "To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset."
+            )
+            for lb in labels:
+                lb["segments"] = []
+        if len_cls == 0:
+            LOGGER.warning(f"WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}")
+        return labels
+
+    def build_transforms(self, hyp=None):
+        """Builds and appends transforms to the list."""
+        if self.augment:
+            hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
+            hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
+            transforms = v8_transforms(self, self.imgsz, hyp)
+        else:
+            transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
+        transforms.append(
+            Format(
+                bbox_format="xywh",
+                normalize=True,
+                return_mask=self.use_segments,
+                return_keypoint=self.use_keypoints,
+                return_obb=self.use_obb,
+                batch_idx=True,
+                mask_ratio=hyp.mask_ratio,
+                mask_overlap=hyp.overlap_mask,
+                bgr=hyp.bgr if self.augment else 0.0,  # only affect training.
+            )
+        )
+        return transforms
+
+    def close_mosaic(self, hyp):
+        """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
+        hyp.mosaic = 0.0  # set mosaic ratio=0.0
+        hyp.copy_paste = 0.0  # keep the same behavior as previous v8 close-mosaic
+        hyp.mixup = 0.0  # keep the same behavior as previous v8 close-mosaic
+        self.transforms = self.build_transforms(hyp)
+
+    def update_labels_info(self, label):
+        """
+        Custom your label format here.
+
+        Note:
+            cls is not with bboxes now, classification and semantic segmentation need an independent cls label
+            Can also support classification and semantic segmentation by adding or removing dict keys there.
+        """
+        bboxes = label.pop("bboxes")
+        segments = label.pop("segments", [])
+        keypoints = label.pop("keypoints", None)
+        bbox_format = label.pop("bbox_format")
+        normalized = label.pop("normalized")
+
+        # NOTE: do NOT resample oriented boxes
+        segment_resamples = 100 if self.use_obb else 1000
+        if len(segments) > 0:
+            # list[np.array(1000, 2)] * num_samples
+            # (N, 1000, 2)
+            segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
+        else:
+            segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
+        label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
+        return label
+
+    @staticmethod
+    def collate_fn(batch):
+        """Collates data samples into batches."""
+        new_batch = {}
+        keys = batch[0].keys()
+        values = list(zip(*[list(b.values()) for b in batch]))
+        for i, k in enumerate(keys):
+            value = values[i]
+            if k == "img":
+                value = torch.stack(value, 0)
+            if k in ["masks", "keypoints", "bboxes", "cls", "segments", "obb"]:
+                value = torch.cat(value, 0)
+            new_batch[k] = value
+        new_batch["batch_idx"] = list(new_batch["batch_idx"])
+        for i in range(len(new_batch["batch_idx"])):
+            new_batch["batch_idx"][i] += i  # add target image index for build_targets()
+        new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0)
+        return new_batch
+
+
+# Classification dataloaders -------------------------------------------------------------------------------------------
+class ClassificationDataset(torchvision.datasets.ImageFolder):
+    """
+    Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
+    augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
+    learning models, with optional image transformations and caching mechanisms to speed up training.
+
+    This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
+    in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
+    to ensure data integrity and consistency.
+
+    Attributes:
+        cache_ram (bool): Indicates if caching in RAM is enabled.
+        cache_disk (bool): Indicates if caching on disk is enabled.
+        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
+                        file (if caching on disk), and optionally the loaded image array (if caching in RAM).
+        torch_transforms (callable): PyTorch transforms to be applied to the images.
+    """
+
+    def __init__(self, root, args, augment=False, prefix=""):
+        """
+        Initialize YOLO object with root, image size, augmentations, and cache settings.
+
+        Args:
+            root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
+            args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
+                parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
+                of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
+                `auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
+            augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
+            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
+                debugging. Default is an empty string.
+        """
+        super().__init__(root=root)
+        if augment and args.fraction < 1.0:  # reduce training fraction
+            self.samples = self.samples[: round(len(self.samples) * args.fraction)]
+        self.prefix = colorstr(f"{prefix}: ") if prefix else ""
+        self.cache_ram = args.cache is True or args.cache == "ram"  # cache images into RAM
+        self.cache_disk = args.cache == "disk"  # cache images on hard drive as uncompressed *.npy files
+        self.samples = self.verify_images()  # filter out bad images
+        self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
+        scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
+        self.torch_transforms = (
+            classify_augmentations(
+                size=args.imgsz,
+                scale=scale,
+                hflip=args.fliplr,
+                vflip=args.flipud,
+                erasing=args.erasing,
+                auto_augment=args.auto_augment,
+                hsv_h=args.hsv_h,
+                hsv_s=args.hsv_s,
+                hsv_v=args.hsv_v,
+            )
+            if augment
+            else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
+        )
+
+    def __getitem__(self, i):
+        """Returns subset of data and targets corresponding to given indices."""
+        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
+        if self.cache_ram and im is None:
+            im = self.samples[i][3] = cv2.imread(f)
+        elif self.cache_disk:
+            if not fn.exists():  # load npy
+                np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
+            im = np.load(fn)
+        else:  # read image
+            im = cv2.imread(f)  # BGR
+        # Convert NumPy array to PIL image
+        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
+        sample = self.torch_transforms(im)
+        return {"img": sample, "cls": j}
+
+    def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
+        return len(self.samples)
+
+    def verify_images(self):
+        """Verify all images in dataset."""
+        desc = f"{self.prefix}Scanning {self.root}..."
+        path = Path(self.root).with_suffix(".cache")  # *.cache file path
+
+        with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
+            cache = load_dataset_cache_file(path)  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash([x[0] for x in self.samples])  # identical hash
+            nf, nc, n, samples = cache.pop("results")  # found, missing, empty, corrupt, total
+            if LOCAL_RANK in (-1, 0):
+                d = f"{desc} {nf} images, {nc} corrupt"
+                TQDM(None, desc=d, total=n, initial=n)
+                if cache["msgs"]:
+                    LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+            return samples
+
+        # Run scan if *.cache retrieval failed
+        nf, nc, msgs, samples, x = 0, 0, [], [], {}
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
+            pbar = TQDM(results, desc=desc, total=len(self.samples))
+            for sample, nf_f, nc_f, msg in pbar:
+                if nf_f:
+                    samples.append(sample)
+                if msg:
+                    msgs.append(msg)
+                nf += nf_f
+                nc += nc_f
+                pbar.desc = f"{desc} {nf} images, {nc} corrupt"
+            pbar.close()
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        x["hash"] = get_hash([x[0] for x in self.samples])
+        x["results"] = nf, nc, len(samples), samples
+        x["msgs"] = msgs  # warnings
+        save_dataset_cache_file(self.prefix, path, x)
+        return samples
+
+
+def load_dataset_cache_file(path):
+    """Load an Ultralytics *.cache dictionary from path."""
+    import gc
+
+    gc.disable()  # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
+    cache = np.load(str(path), allow_pickle=True).item()  # load dict
+    gc.enable()
+    return cache
+
+
+def save_dataset_cache_file(prefix, path, x):
+    """Save an Ultralytics dataset *.cache dictionary x to path."""
+    x["version"] = DATASET_CACHE_VERSION  # add cache version
+    if is_dir_writeable(path.parent):
+        if path.exists():
+            path.unlink()  # remove *.cache file if exists
+        np.save(str(path), x)  # save cache for next time
+        path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+        LOGGER.info(f"{prefix}New cache created: {path}")
+    else:
+        LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
+
+
+# TODO: support semantic segmentation
+class SemanticDataset(BaseDataset):
+    """
+    Semantic Segmentation Dataset.
+
+    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
+    from the BaseDataset class.
+
+    Note:
+        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
+        semantic segmentation tasks.
+    """
+
+    def __init__(self):
+        """Initialize a SemanticDataset object."""
+        super().__init__()
--- a/ultralytics/data/explorer/init.py
+++ b/ultralytics/data/explorer/init.py
@ -0,0 +1,5 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+
+from .utils import plot_query_result
+
+__all__ = ["plot_query_result"]
--- a/ultralytics/data/explorer/pycache/init.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/init.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/init.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/init.cpython-39.pyc
--- a/ultralytics/data/explorer/pycache/explorer.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/explorer.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/explorer.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/explorer.cpython-39.pyc
--- a/ultralytics/data/explorer/pycache/utils.cpython-312.pyc
+++ b/ultralytics/data/explorer/pycache/utils.cpython-312.pyc
--- a/ultralytics/data/explorer/pycache/utils.cpython-39.pyc
+++ b/ultralytics/data/explorer/pycache/utils.cpython-39.pyc
--- a/Show More
+++ b/Show More