add yolo v10 and modify pipeline

2025-03-28 13:19:54 +08:00
parent 183299c06b
commit 798c596acc
471 changed files with 19109 additions and 7342 deletions
--- a/ultralytics/models/rtdetr/train.py
+++ b/ultralytics/models/rtdetr/train.py
@ -7,16 +7,17 @@ import torch
 from ultralytics.models.yolo.detect import DetectionTrainer
 from ultralytics.nn.tasks import RTDETRDetectionModel
 from ultralytics.utils import RANK, colorstr
-
 from .val import RTDETRDataset, RTDETRValidator


 class RTDETRTrainer(DetectionTrainer):
    """
-    A class extending the DetectionTrainer class for training based on an RT-DETR detection model.
+    Trainer class for the RT-DETR model developed by Baidu for real-time object detection. Extends the DetectionTrainer
+    class for YOLO to adapt to the specific features and architecture of RT-DETR. This model leverages Vision
+    Transformers and has capabilities like IoU-aware query selection and adaptable inference speed.

    Notes:
-        - F.grid_sample used in rt-detr does not support the `deterministic=True` argument.
+        - F.grid_sample used in RT-DETR does not support the `deterministic=True` argument.
        - AMP training can lead to NaN outputs and may produce errors during bipartite graph matching.

    Example:
@ -30,43 +31,71 @@ class RTDETRTrainer(DetectionTrainer):
    """

    def get_model(self, cfg=None, weights=None, verbose=True):
-        """Return a YOLO detection model."""
-        model = RTDETRDetectionModel(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
+        """
+        Initialize and return an RT-DETR model for object detection tasks.
+
+        Args:
+            cfg (dict, optional): Model configuration. Defaults to None.
+            weights (str, optional): Path to pre-trained model weights. Defaults to None.
+            verbose (bool): Verbose logging if True. Defaults to True.
+
+        Returns:
+            (RTDETRDetectionModel): Initialized model.
+        """
+        model = RTDETRDetectionModel(cfg, nc=self.data["nc"], verbose=verbose and RANK == -1)
        if weights:
            model.load(weights)
        return model

-    def build_dataset(self, img_path, mode='val', batch=None):
-        """Build RTDETR Dataset
+    def build_dataset(self, img_path, mode="val", batch=None):
+        """
+        Build and return an RT-DETR dataset for training or validation.

        Args:
            img_path (str): Path to the folder containing images.
-            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
-            batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
+            mode (str): Dataset mode, either 'train' or 'val'.
+            batch (int, optional): Batch size for rectangle training. Defaults to None.
+
+        Returns:
+            (RTDETRDataset): Dataset object for the specific mode.
        """
        return RTDETRDataset(
            img_path=img_path,
            imgsz=self.args.imgsz,
            batch_size=batch,
-            augment=mode == 'train',  # no augmentation
+            augment=mode == "train",
            hyp=self.args,
-            rect=False,  # no rect
+            rect=False,
            cache=self.args.cache or None,
-            prefix=colorstr(f'{mode}: '),
-            data=self.data)
+            prefix=colorstr(f"{mode}: "),
+            data=self.data,
+        )

    def get_validator(self):
-        """Returns a DetectionValidator for RTDETR model validation."""
-        self.loss_names = 'giou_loss', 'cls_loss', 'l1_loss'
+        """
+        Returns a DetectionValidator suitable for RT-DETR model validation.
+
+        Returns:
+            (RTDETRValidator): Validator object for model validation.
+        """
+        self.loss_names = "giou_loss", "cls_loss", "l1_loss"
        return RTDETRValidator(self.test_loader, save_dir=self.save_dir, args=copy(self.args))

    def preprocess_batch(self, batch):
-        """Preprocesses a batch of images by scaling and converting to float."""
+        """
+        Preprocess a batch of images. Scales and converts the images to float format.
+
+        Args:
+            batch (dict): Dictionary containing a batch of images, bboxes, and labels.
+
+        Returns:
+            (dict): Preprocessed batch.
+        """
        batch = super().preprocess_batch(batch)
-        bs = len(batch['img'])
-        batch_idx = batch['batch_idx']
+        bs = len(batch["img"])
+        batch_idx = batch["batch_idx"]
        gt_bbox, gt_class = [], []
        for i in range(bs):
-            gt_bbox.append(batch['bboxes'][batch_idx == i].to(batch_idx.device))
-            gt_class.append(batch['cls'][batch_idx == i].to(device=batch_idx.device, dtype=torch.long))
+            gt_bbox.append(batch["bboxes"][batch_idx == i].to(batch_idx.device))
+            gt_class.append(batch["cls"][batch_idx == i].to(device=batch_idx.device, dtype=torch.long))
        return batch