add yolo v10 and modify pipeline

2025-03-28 13:19:54 +08:00
parent 183299c06b
commit 798c596acc
471 changed files with 19109 additions and 7342 deletions
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@ -1,31 +1,120 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

 import json
-import shutil
 from collections import defaultdict
 from pathlib import Path

 import cv2
 import numpy as np

-from ultralytics.utils import TQDM
+from ultralytics.utils import LOGGER, TQDM
+from ultralytics.utils.files import increment_path


 def coco91_to_coco80_class():
-    """Converts 91-index COCO class IDs to 80-index COCO class IDs.
+    """
+    Converts 91-index COCO class IDs to 80-index COCO class IDs.

    Returns:
        (list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
            corresponding 91-index class ID.
    """
    return [
-        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
-        None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-        51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
-        None, 73, 74, 75, 76, 77, 78, 79, None]
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        None,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        None,
+        24,
+        25,
+        None,
+        None,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        None,
+        40,
+        41,
+        42,
+        43,
+        44,
+        45,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        None,
+        60,
+        None,
+        None,
+        61,
+        None,
+        62,
+        63,
+        64,
+        65,
+        66,
+        67,
+        68,
+        69,
+        70,
+        71,
+        72,
+        None,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        None,
+    ]


-def coco80_to_coco91_class():  #
+def coco80_to_coco91_class():
    """
    Converts 80-index (val2014) to 91-index (paper).
    For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
@ -41,16 +130,102 @@ def coco80_to_coco91_class():  #
        ```
    """
    return [
-        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
-        35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-        64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        27,
+        28,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        43,
+        44,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        64,
+        65,
+        67,
+        70,
+        72,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        80,
+        81,
+        82,
+        84,
+        85,
+        86,
+        87,
+        88,
+        89,
+        90,
+    ]


-def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keypoints=False, cls91to80=True):
-    """Converts COCO dataset annotations to a format suitable for training YOLOv5 models.
+def convert_coco(
+    labels_dir="../coco/annotations/",
+    save_dir="coco_converted/",
+    use_segments=False,
+    use_keypoints=False,
+    cls91to80=True,
+):
+    """
+    Converts COCO dataset annotations to a YOLO annotation format  suitable for training YOLO models.

    Args:
        labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
+        save_dir (str, optional): Path to directory to save results to.
        use_segments (bool, optional): Whether to include segmentation masks in the output.
        use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
        cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
@ -67,78 +242,79 @@ def convert_coco(labels_dir='../coco/annotations/', use_segments=False, use_keyp
    """

    # Create dataset directory
-    save_dir = Path('yolo_labels')
-    if save_dir.exists():
-        shutil.rmtree(save_dir)  # delete dir
-    for p in save_dir / 'labels', save_dir / 'images':
+    save_dir = increment_path(save_dir)  # increment if save directory already exists
+    for p in save_dir / "labels", save_dir / "images":
        p.mkdir(parents=True, exist_ok=True)  # make dir

    # Convert classes
    coco80 = coco91_to_coco80_class()

    # Import json
-    for json_file in sorted(Path(labels_dir).resolve().glob('*.json')):
-        fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '')  # folder name
+    for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
+        fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "")  # folder name
        fn.mkdir(parents=True, exist_ok=True)
        with open(json_file) as f:
            data = json.load(f)

        # Create image dict
-        images = {f'{x["id"]:d}': x for x in data['images']}
+        images = {f'{x["id"]:d}': x for x in data["images"]}
        # Create image-annotations dict
        imgToAnns = defaultdict(list)
-        for ann in data['annotations']:
-            imgToAnns[ann['image_id']].append(ann)
+        for ann in data["annotations"]:
+            imgToAnns[ann["image_id"]].append(ann)

        # Write labels file
-        for img_id, anns in TQDM(imgToAnns.items(), desc=f'Annotations {json_file}'):
-            img = images[f'{img_id:d}']
-            h, w, f = img['height'], img['width'], img['file_name']
+        for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
+            img = images[f"{img_id:d}"]
+            h, w, f = img["height"], img["width"], img["file_name"]

            bboxes = []
            segments = []
            keypoints = []
            for ann in anns:
-                if ann['iscrowd']:
+                if ann["iscrowd"]:
                    continue
                # The COCO box format is [top left x, top left y, width, height]
-                box = np.array(ann['bbox'], dtype=np.float64)
+                box = np.array(ann["bbox"], dtype=np.float64)
                box[:2] += box[2:] / 2  # xy top-left corner to center
                box[[0, 2]] /= w  # normalize x
                box[[1, 3]] /= h  # normalize y
                if box[2] <= 0 or box[3] <= 0:  # if w <= 0 and h <= 0
                    continue

-                cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1  # class
+                cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1  # class
                box = [cls] + box.tolist()
                if box not in bboxes:
                    bboxes.append(box)
-                if use_segments and ann.get('segmentation') is not None:
-                    if len(ann['segmentation']) == 0:
-                        segments.append([])
-                        continue
-                    elif len(ann['segmentation']) > 1:
-                        s = merge_multi_segment(ann['segmentation'])
-                        s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
-                    else:
-                        s = [j for i in ann['segmentation'] for j in i]  # all segments concatenated
-                        s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
-                    s = [cls] + s
-                    if s not in segments:
+                    if use_segments and ann.get("segmentation") is not None:
+                        if len(ann["segmentation"]) == 0:
+                            segments.append([])
+                            continue
+                        elif len(ann["segmentation"]) > 1:
+                            s = merge_multi_segment(ann["segmentation"])
+                            s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
+                        else:
+                            s = [j for i in ann["segmentation"] for j in i]  # all segments concatenated
+                            s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
+                        s = [cls] + s
                        segments.append(s)
-                if use_keypoints and ann.get('keypoints') is not None:
-                    keypoints.append(box + (np.array(ann['keypoints']).reshape(-1, 3) /
-                                            np.array([w, h, 1])).reshape(-1).tolist())
+                    if use_keypoints and ann.get("keypoints") is not None:
+                        keypoints.append(
+                            box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
+                        )

            # Write
-            with open((fn / f).with_suffix('.txt'), 'a') as file:
+            with open((fn / f).with_suffix(".txt"), "a") as file:
                for i in range(len(bboxes)):
                    if use_keypoints:
-                        line = *(keypoints[i]),  # cls, box, keypoints
+                        line = (*(keypoints[i]),)  # cls, box, keypoints
                    else:
-                        line = *(segments[i]
-                                 if use_segments and len(segments[i]) > 0 else bboxes[i]),  # cls, box or segments
-                    file.write(('%g ' * len(line)).rstrip() % line + '\n')
+                        line = (
+                            *(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
+                        )  # cls, box or segments
+                    file.write(("%g " * len(line)).rstrip() % line + "\n")
+
+    LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")


 def convert_dota_to_yolo_obb(dota_root_path: str):
@ -160,48 +336,52 @@ def convert_dota_to_yolo_obb(dota_root_path: str):

    Notes:
        The directory structure assumed for the DOTA dataset:
-            - DOTA
-                - images
-                    - train
-                    - val
-                - labels
-                    - train_original
-                    - val_original

-        After the function execution, the new labels will be saved in:
            - DOTA
-                - labels
-                    - train
-                    - val
+                ├─ images
+                │   ├─ train
+                │   └─ val
+                └─ labels
+                    ├─ train_original
+                    └─ val_original
+
+        After execution, the function will organize the labels into:
+
+            - DOTA
+                └─ labels
+                    ├─ train
+                    └─ val
    """
    dota_root_path = Path(dota_root_path)

    # Class names to indices mapping
    class_mapping = {
-        'plane': 0,
-        'ship': 1,
-        'storage-tank': 2,
-        'baseball-diamond': 3,
-        'tennis-court': 4,
-        'basketball-court': 5,
-        'ground-track-field': 6,
-        'harbor': 7,
-        'bridge': 8,
-        'large-vehicle': 9,
-        'small-vehicle': 10,
-        'helicopter': 11,
-        'roundabout': 12,
-        'soccer ball-field': 13,
-        'swimming-pool': 14,
-        'container-crane': 15,
-        'airport': 16,
-        'helipad': 17}
+        "plane": 0,
+        "ship": 1,
+        "storage-tank": 2,
+        "baseball-diamond": 3,
+        "tennis-court": 4,
+        "basketball-court": 5,
+        "ground-track-field": 6,
+        "harbor": 7,
+        "bridge": 8,
+        "large-vehicle": 9,
+        "small-vehicle": 10,
+        "helicopter": 11,
+        "roundabout": 12,
+        "soccer-ball-field": 13,
+        "swimming-pool": 14,
+        "container-crane": 15,
+        "airport": 16,
+        "helipad": 17,
+    }

    def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
-        orig_label_path = orig_label_dir / f'{image_name}.txt'
-        save_path = save_dir / f'{image_name}.txt'
+        """Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
+        orig_label_path = orig_label_dir / f"{image_name}.txt"
+        save_path = save_dir / f"{image_name}.txt"

-        with orig_label_path.open('r') as f, save_path.open('w') as g:
+        with orig_label_path.open("r") as f, save_path.open("w") as g:
            lines = f.readlines()
            for line in lines:
                parts = line.strip().split()
@ -211,20 +391,21 @@ def convert_dota_to_yolo_obb(dota_root_path: str):
                class_idx = class_mapping[class_name]
                coords = [float(p) for p in parts[:8]]
                normalized_coords = [
-                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)]
-                formatted_coords = ['{:.6g}'.format(coord) for coord in normalized_coords]
+                    coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
+                ]
+                formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
                g.write(f"{class_idx} {' '.join(formatted_coords)}\n")

-    for phase in ['train', 'val']:
-        image_dir = dota_root_path / 'images' / phase
-        orig_label_dir = dota_root_path / 'labels' / f'{phase}_original'
-        save_dir = dota_root_path / 'labels' / phase
+    for phase in ["train", "val"]:
+        image_dir = dota_root_path / "images" / phase
+        orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
+        save_dir = dota_root_path / "labels" / phase

        save_dir.mkdir(parents=True, exist_ok=True)

        image_paths = list(image_dir.iterdir())
-        for image_path in TQDM(image_paths, desc=f'Processing {phase} images'):
-            if image_path.suffix != '.png':
+        for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
+            if image_path.suffix != ".png":
                continue
            image_name_without_ext = image_path.stem
            img = cv2.imread(str(image_path))
@ -237,8 +418,8 @@ def min_index(arr1, arr2):
    Find a pair of indexes with the shortest distance between two arrays of 2D points.

    Args:
-        arr1 (np.array): A NumPy array of shape (N, 2) representing N 2D points.
-        arr2 (np.array): A NumPy array of shape (M, 2) representing M 2D points.
+        arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points.
+        arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.

    Returns:
        (tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
@ -263,31 +444,30 @@ def merge_multi_segment(segments):
    segments = [np.array(i).reshape(-1, 2) for i in segments]
    idx_list = [[] for _ in range(len(segments))]

-    # record the indexes with min distance between each segment
+    # Record the indexes with min distance between each segment
    for i in range(1, len(segments)):
        idx1, idx2 = min_index(segments[i - 1], segments[i])
        idx_list[i - 1].append(idx1)
        idx_list[i].append(idx2)

-    # use two round to connect all the segments
+    # Use two round to connect all the segments
    for k in range(2):
-        # forward connection
+        # Forward connection
        if k == 0:
            for i, idx in enumerate(idx_list):
-                # middle segments have two indexes
-                # reverse the index of middle segments
+                # Middle segments have two indexes, reverse the index of middle segments
                if len(idx) == 2 and idx[0] > idx[1]:
                    idx = idx[::-1]
                    segments[i] = segments[i][::-1, :]

                segments[i] = np.roll(segments[i], -idx[0], axis=0)
                segments[i] = np.concatenate([segments[i], segments[i][:1]])
-                # deal with the first segment and the last one
+                # Deal with the first segment and the last one
                if i in [0, len(idx_list) - 1]:
                    s.append(segments[i])
                else:
                    idx = [0, idx[1] - idx[0]]
-                    s.append(segments[i][idx[0]:idx[1] + 1])
+                    s.append(segments[i][idx[0] : idx[1] + 1])

        else:
            for i in range(len(idx_list) - 1, -1, -1):
@ -296,3 +476,67 @@ def merge_multi_segment(segments):
                    nidx = abs(idx[1] - idx[0])
                    s.append(segments[i][nidx:])
    return s
+
+
+def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
+    """
+    Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
+    in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
+
+    Args:
+        im_dir (str | Path): Path to image directory to convert.
+        save_dir (str | Path): Path to save the generated labels, labels will be saved
+            into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
+        sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
+
+    Notes:
+        The input directory structure assumed for dataset:
+
+            - im_dir
+                ├─ 001.jpg
+                ├─ ..
+                └─ NNN.jpg
+            - labels
+                ├─ 001.txt
+                ├─ ..
+                └─ NNN.txt
+    """
+    from ultralytics.data import YOLODataset
+    from ultralytics.utils.ops import xywh2xyxy
+    from ultralytics.utils import LOGGER
+    from ultralytics import SAM
+    from tqdm import tqdm
+
+    # NOTE: add placeholder to pass class index check
+    dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))
+    if len(dataset.labels[0]["segments"]) > 0:  # if it's segment data
+        LOGGER.info("Segmentation labels detected, no need to generate new ones!")
+        return
+
+    LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
+    sam_model = SAM(sam_model)
+    for l in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
+        h, w = l["shape"]
+        boxes = l["bboxes"]
+        if len(boxes) == 0:  # skip empty labels
+            continue
+        boxes[:, [0, 2]] *= w
+        boxes[:, [1, 3]] *= h
+        im = cv2.imread(l["im_file"])
+        sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
+        l["segments"] = sam_results[0].masks.xyn
+
+    save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
+    save_dir.mkdir(parents=True, exist_ok=True)
+    for l in dataset.labels:
+        texts = []
+        lb_name = Path(l["im_file"]).with_suffix(".txt").name
+        txt_file = save_dir / lb_name
+        cls = l["cls"]
+        for i, s in enumerate(l["segments"]):
+            line = (int(cls[i]), *s.reshape(-1))
+            texts.append(("%g " * len(line)).rstrip() % line)
+        if texts:
+            with open(txt_file, "a") as f:
+                f.writelines(text + "\n" for text in texts)
+    LOGGER.info(f"Generated segment labels saved in {save_dir}")