mirror of
https://gitee.com/nanjing-yimao-information/ieemoo-ai-gift.git
synced 2025-08-20 06:10:26 +00:00
update
This commit is contained in:
15
ultralytics/data/__init__.py
Normal file
15
ultralytics/data/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .base import BaseDataset
|
||||
from .build import build_dataloader, build_yolo_dataset, load_inference_source
|
||||
from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
|
||||
|
||||
__all__ = (
|
||||
"BaseDataset",
|
||||
"ClassificationDataset",
|
||||
"SemanticDataset",
|
||||
"YOLODataset",
|
||||
"build_yolo_dataset",
|
||||
"build_dataloader",
|
||||
"load_inference_source",
|
||||
)
|
50
ultralytics/data/annotator.py
Normal file
50
ultralytics/data/annotator.py
Normal file
@ -0,0 +1,50 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from ultralytics import SAM, YOLO
|
||||
|
||||
|
||||
def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
|
||||
"""
|
||||
Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
|
||||
|
||||
Args:
|
||||
data (str): Path to a folder containing images to be annotated.
|
||||
det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
|
||||
sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
|
||||
device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
|
||||
output_dir (str | None | optional): Directory to save the annotated results.
|
||||
Defaults to a 'labels' folder in the same directory as 'data'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.annotator import auto_annotate
|
||||
|
||||
auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
|
||||
```
|
||||
"""
|
||||
det_model = YOLO(det_model)
|
||||
sam_model = SAM(sam_model)
|
||||
|
||||
data = Path(data)
|
||||
if not output_dir:
|
||||
output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
|
||||
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
||||
|
||||
det_results = det_model(data, stream=True, device=device)
|
||||
|
||||
for result in det_results:
|
||||
class_ids = result.boxes.cls.int().tolist() # noqa
|
||||
if len(class_ids):
|
||||
boxes = result.boxes.xyxy # Boxes object for bbox outputs
|
||||
sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
|
||||
segments = sam_results[0].masks.xyn # noqa
|
||||
|
||||
with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
|
||||
for i in range(len(segments)):
|
||||
s = segments[i]
|
||||
if len(s) == 0:
|
||||
continue
|
||||
segment = map(str, segments[i].reshape(-1).tolist())
|
||||
f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")
|
1254
ultralytics/data/augment.py
Normal file
1254
ultralytics/data/augment.py
Normal file
File diff suppressed because it is too large
Load Diff
311
ultralytics/data/base.py
Normal file
311
ultralytics/data/base.py
Normal file
@ -0,0 +1,311 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import glob
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import psutil
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
|
||||
from .utils import HELP_URL, IMG_FORMATS
|
||||
|
||||
|
||||
class BaseDataset(Dataset):
|
||||
"""
|
||||
Base dataset class for loading and processing image data.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the folder containing images.
|
||||
imgsz (int, optional): Image size. Defaults to 640.
|
||||
cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
|
||||
augment (bool, optional): If True, data augmentation is applied. Defaults to True.
|
||||
hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
|
||||
prefix (str, optional): Prefix to print in log messages. Defaults to ''.
|
||||
rect (bool, optional): If True, rectangular training is used. Defaults to False.
|
||||
batch_size (int, optional): Size of batches. Defaults to None.
|
||||
stride (int, optional): Stride. Defaults to 32.
|
||||
pad (float, optional): Padding. Defaults to 0.0.
|
||||
single_cls (bool, optional): If True, single class training is used. Defaults to False.
|
||||
classes (list): List of included classes. Default is None.
|
||||
fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
|
||||
|
||||
Attributes:
|
||||
im_files (list): List of image file paths.
|
||||
labels (list): List of label data dictionaries.
|
||||
ni (int): Number of images in the dataset.
|
||||
ims (list): List of loaded images.
|
||||
npy_files (list): List of numpy file paths.
|
||||
transforms (callable): Image transformation function.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
img_path,
|
||||
imgsz=640,
|
||||
cache=False,
|
||||
augment=True,
|
||||
hyp=DEFAULT_CFG,
|
||||
prefix="",
|
||||
rect=False,
|
||||
batch_size=16,
|
||||
stride=32,
|
||||
pad=0.5,
|
||||
single_cls=False,
|
||||
classes=None,
|
||||
fraction=1.0,
|
||||
):
|
||||
"""Initialize BaseDataset with given configuration and options."""
|
||||
super().__init__()
|
||||
self.img_path = img_path
|
||||
self.imgsz = imgsz
|
||||
self.augment = augment
|
||||
self.single_cls = single_cls
|
||||
self.prefix = prefix
|
||||
self.fraction = fraction
|
||||
self.im_files = self.get_img_files(self.img_path)
|
||||
self.labels = self.get_labels()
|
||||
self.update_labels(include_class=classes) # single_cls and include_class
|
||||
self.ni = len(self.labels) # number of images
|
||||
self.rect = rect
|
||||
self.batch_size = batch_size
|
||||
self.stride = stride
|
||||
self.pad = pad
|
||||
if self.rect:
|
||||
assert self.batch_size is not None
|
||||
self.set_rectangle()
|
||||
|
||||
# Buffer thread for mosaic images
|
||||
self.buffer = [] # buffer size = batch size
|
||||
self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0
|
||||
|
||||
# Cache images
|
||||
if cache == "ram" and not self.check_cache_ram():
|
||||
cache = False
|
||||
self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
|
||||
self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
|
||||
if cache:
|
||||
self.cache_images(cache)
|
||||
|
||||
# Transforms
|
||||
self.transforms = self.build_transforms(hyp=hyp)
|
||||
|
||||
def get_img_files(self, img_path):
|
||||
"""Read image files."""
|
||||
try:
|
||||
f = [] # image files
|
||||
for p in img_path if isinstance(img_path, list) else [img_path]:
|
||||
p = Path(p) # os-agnostic
|
||||
if p.is_dir(): # dir
|
||||
f += glob.glob(str(p / "**" / "*.*"), recursive=True)
|
||||
# F = list(p.rglob('*.*')) # pathlib
|
||||
elif p.is_file(): # file
|
||||
with open(p) as t:
|
||||
t = t.read().strip().splitlines()
|
||||
parent = str(p.parent) + os.sep
|
||||
f += [x.replace("./", parent) if x.startswith("./") else x for x in t] # local to global path
|
||||
# F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
|
||||
else:
|
||||
raise FileNotFoundError(f"{self.prefix}{p} does not exist")
|
||||
im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS)
|
||||
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
|
||||
assert im_files, f"{self.prefix}No images found in {img_path}"
|
||||
except Exception as e:
|
||||
raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n{HELP_URL}") from e
|
||||
if self.fraction < 1:
|
||||
# im_files = im_files[: round(len(im_files) * self.fraction)]
|
||||
num_elements_to_select = round(len(im_files) * self.fraction)
|
||||
im_files = random.sample(im_files, num_elements_to_select)
|
||||
return im_files
|
||||
|
||||
def update_labels(self, include_class: Optional[list]):
|
||||
"""Update labels to include only these classes (optional)."""
|
||||
include_class_array = np.array(include_class).reshape(1, -1)
|
||||
for i in range(len(self.labels)):
|
||||
if include_class is not None:
|
||||
cls = self.labels[i]["cls"]
|
||||
bboxes = self.labels[i]["bboxes"]
|
||||
segments = self.labels[i]["segments"]
|
||||
keypoints = self.labels[i]["keypoints"]
|
||||
j = (cls == include_class_array).any(1)
|
||||
self.labels[i]["cls"] = cls[j]
|
||||
self.labels[i]["bboxes"] = bboxes[j]
|
||||
if segments:
|
||||
self.labels[i]["segments"] = [segments[si] for si, idx in enumerate(j) if idx]
|
||||
if keypoints is not None:
|
||||
self.labels[i]["keypoints"] = keypoints[j]
|
||||
if self.single_cls:
|
||||
self.labels[i]["cls"][:, 0] = 0
|
||||
|
||||
def load_image(self, i, rect_mode=True):
|
||||
"""Loads 1 image from dataset index 'i', returns (im, resized hw)."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
try:
|
||||
im = np.load(fn)
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}")
|
||||
Path(fn).unlink(missing_ok=True)
|
||||
im = cv2.imread(f) # BGR
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
if im is None:
|
||||
raise FileNotFoundError(f"Image Not Found {f}")
|
||||
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
if rect_mode: # resize long side to imgsz while maintaining aspect ratio
|
||||
r = self.imgsz / max(h0, w0) # ratio
|
||||
if r != 1: # if sizes are not equal
|
||||
w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
|
||||
im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
||||
elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
|
||||
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Add to buffer if training with augmentations
|
||||
if self.augment:
|
||||
self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
|
||||
self.buffer.append(i)
|
||||
if len(self.buffer) >= self.max_buffer_length:
|
||||
j = self.buffer.pop(0)
|
||||
self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
|
||||
|
||||
return im, (h0, w0), im.shape[:2]
|
||||
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
||||
|
||||
def cache_images(self, cache):
|
||||
"""Cache images to memory or disk."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
fcn = self.cache_images_to_disk if cache == "disk" else self.load_image
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(fcn, range(self.ni))
|
||||
pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
|
||||
for i, x in pbar:
|
||||
if cache == "disk":
|
||||
b += self.npy_files[i].stat().st_size
|
||||
else: # 'ram'
|
||||
self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
|
||||
b += self.ims[i].nbytes
|
||||
pbar.desc = f"{self.prefix}Caching images ({b / gb:.1f}GB {cache})"
|
||||
pbar.close()
|
||||
|
||||
def cache_images_to_disk(self, i):
|
||||
"""Saves an image as an *.npy file for faster loading."""
|
||||
f = self.npy_files[i]
|
||||
if not f.exists():
|
||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
|
||||
|
||||
def check_cache_ram(self, safety_margin=0.5):
|
||||
"""Check image caching requirements vs available memory."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
n = min(self.ni, 30) # extrapolate from 30 random images
|
||||
for _ in range(n):
|
||||
im = cv2.imread(random.choice(self.im_files)) # sample image
|
||||
ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
|
||||
b += im.nbytes * ratio**2
|
||||
mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
|
||||
mem = psutil.virtual_memory()
|
||||
cache = mem_required < mem.available # to cache or not to cache, that is the question
|
||||
if not cache:
|
||||
LOGGER.info(
|
||||
f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
|
||||
f'with {int(safety_margin * 100)}% safety margin but only '
|
||||
f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
|
||||
f"{'caching images ✅' if cache else 'not caching images ⚠️'}"
|
||||
)
|
||||
return cache
|
||||
|
||||
def set_rectangle(self):
|
||||
"""Sets the shape of bounding boxes for YOLO detections as rectangles."""
|
||||
bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
|
||||
nb = bi[-1] + 1 # number of batches
|
||||
|
||||
s = np.array([x.pop("shape") for x in self.labels]) # hw
|
||||
ar = s[:, 0] / s[:, 1] # aspect ratio
|
||||
irect = ar.argsort()
|
||||
self.im_files = [self.im_files[i] for i in irect]
|
||||
self.labels = [self.labels[i] for i in irect]
|
||||
ar = ar[irect]
|
||||
|
||||
# Set training image shapes
|
||||
shapes = [[1, 1]] * nb
|
||||
for i in range(nb):
|
||||
ari = ar[bi == i]
|
||||
mini, maxi = ari.min(), ari.max()
|
||||
if maxi < 1:
|
||||
shapes[i] = [maxi, 1]
|
||||
elif mini > 1:
|
||||
shapes[i] = [1, 1 / mini]
|
||||
|
||||
self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
|
||||
self.batch = bi # batch index of image
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Returns transformed label information for given index."""
|
||||
return self.transforms(self.get_image_and_label(index))
|
||||
|
||||
def get_image_and_label(self, index):
|
||||
"""Get and return label information from the dataset."""
|
||||
label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
|
||||
label.pop("shape", None) # shape is for rect, remove it
|
||||
label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index)
|
||||
label["ratio_pad"] = (
|
||||
label["resized_shape"][0] / label["ori_shape"][0],
|
||||
label["resized_shape"][1] / label["ori_shape"][1],
|
||||
) # for evaluation
|
||||
if self.rect:
|
||||
label["rect_shape"] = self.batch_shapes[self.batch[index]]
|
||||
return self.update_labels_info(label)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the labels list for the dataset."""
|
||||
return len(self.labels)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""Custom your label format here."""
|
||||
return label
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""
|
||||
Users can customize augmentations here.
|
||||
|
||||
Example:
|
||||
```python
|
||||
if self.augment:
|
||||
# Training transforms
|
||||
return Compose([])
|
||||
else:
|
||||
# Val transforms
|
||||
return Compose([])
|
||||
```
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def get_labels(self):
|
||||
"""
|
||||
Users can customize their own format here.
|
||||
|
||||
Note:
|
||||
Ensure output is a dictionary with the following keys:
|
||||
```python
|
||||
dict(
|
||||
im_file=im_file,
|
||||
shape=shape, # format: (height, width)
|
||||
cls=cls,
|
||||
bboxes=bboxes, # xywh
|
||||
segments=segments, # xy
|
||||
keypoints=keypoints, # xy
|
||||
normalized=True, # or False
|
||||
bbox_format="xyxy", # or xywh, ltwh
|
||||
)
|
||||
```
|
||||
"""
|
||||
raise NotImplementedError
|
186
ultralytics/data/build.py
Normal file
186
ultralytics/data/build.py
Normal file
@ -0,0 +1,186 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import os
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import dataloader, distributed
|
||||
|
||||
from ultralytics.data.loaders import (
|
||||
LOADERS,
|
||||
LoadImagesAndVideos,
|
||||
LoadPilAndNumpy,
|
||||
LoadScreenshots,
|
||||
LoadStreams,
|
||||
LoadTensor,
|
||||
SourceTypes,
|
||||
autocast_list,
|
||||
)
|
||||
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.utils import RANK, colorstr
|
||||
from ultralytics.utils.checks import check_file
|
||||
from .dataset import YOLODataset
|
||||
from .utils import PIN_MEMORY
|
||||
|
||||
|
||||
class InfiniteDataLoader(dataloader.DataLoader):
|
||||
"""
|
||||
Dataloader that reuses workers.
|
||||
|
||||
Uses same syntax as vanilla DataLoader.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Dataloader that infinitely recycles workers, inherits from DataLoader."""
|
||||
super().__init__(*args, **kwargs)
|
||||
object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
|
||||
self.iterator = super().__iter__()
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the batch sampler's sampler."""
|
||||
return len(self.batch_sampler.sampler)
|
||||
|
||||
def __iter__(self):
|
||||
"""Creates a sampler that repeats indefinitely."""
|
||||
for _ in range(len(self)):
|
||||
yield next(self.iterator)
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset iterator.
|
||||
|
||||
This is useful when we want to modify settings of dataset while training.
|
||||
"""
|
||||
self.iterator = self._get_iterator()
|
||||
|
||||
|
||||
class _RepeatSampler:
|
||||
"""
|
||||
Sampler that repeats forever.
|
||||
|
||||
Args:
|
||||
sampler (Dataset.sampler): The sampler to repeat.
|
||||
"""
|
||||
|
||||
def __init__(self, sampler):
|
||||
"""Initializes an object that repeats a given sampler indefinitely."""
|
||||
self.sampler = sampler
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterates over the 'sampler' and yields its contents."""
|
||||
while True:
|
||||
yield from iter(self.sampler)
|
||||
|
||||
|
||||
def seed_worker(worker_id): # noqa
|
||||
"""Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader."""
|
||||
worker_seed = torch.initial_seed() % 2**32
|
||||
np.random.seed(worker_seed)
|
||||
random.seed(worker_seed)
|
||||
|
||||
|
||||
def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
|
||||
"""Build YOLO Dataset."""
|
||||
return YOLODataset(
|
||||
img_path=img_path,
|
||||
imgsz=cfg.imgsz,
|
||||
batch_size=batch,
|
||||
augment=mode == "train", # augmentation
|
||||
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
|
||||
rect=cfg.rect or rect, # rectangular batches
|
||||
cache=cfg.cache or None,
|
||||
single_cls=cfg.single_cls or False,
|
||||
stride=int(stride),
|
||||
pad=0.0 if mode == "train" else 0.5,
|
||||
prefix=colorstr(f"{mode}: "),
|
||||
task=cfg.task,
|
||||
classes=cfg.classes,
|
||||
data=data,
|
||||
fraction=cfg.fraction if mode == "train" else 1.0,
|
||||
)
|
||||
|
||||
|
||||
def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
|
||||
"""Return an InfiniteDataLoader or DataLoader for training or validation set."""
|
||||
batch = min(batch, len(dataset))
|
||||
nd = torch.cuda.device_count() # number of CUDA devices
|
||||
nw = min([os.cpu_count() // max(nd, 1), workers]) # number of workers
|
||||
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
|
||||
generator = torch.Generator()
|
||||
generator.manual_seed(6148914691236517205 + RANK)
|
||||
return InfiniteDataLoader(
|
||||
dataset=dataset,
|
||||
batch_size=batch,
|
||||
shuffle=shuffle and sampler is None,
|
||||
num_workers=nw,
|
||||
sampler=sampler,
|
||||
pin_memory=PIN_MEMORY,
|
||||
collate_fn=getattr(dataset, "collate_fn", None),
|
||||
worker_init_fn=seed_worker,
|
||||
generator=generator,
|
||||
)
|
||||
|
||||
|
||||
def check_source(source):
|
||||
"""Check source type and return corresponding flag values."""
|
||||
webcam, screenshot, from_img, in_memory, tensor = False, False, False, False, False
|
||||
if isinstance(source, (str, int, Path)): # int for local usb camera
|
||||
source = str(source)
|
||||
is_file = Path(source).suffix[1:] in (IMG_FORMATS | VID_FORMATS)
|
||||
is_url = source.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://"))
|
||||
webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
|
||||
screenshot = source.lower() == "screen"
|
||||
if is_url and is_file:
|
||||
source = check_file(source) # download
|
||||
elif isinstance(source, LOADERS):
|
||||
in_memory = True
|
||||
elif isinstance(source, (list, tuple)):
|
||||
source = autocast_list(source) # convert all list elements to PIL or np arrays
|
||||
from_img = True
|
||||
elif isinstance(source, (Image.Image, np.ndarray)):
|
||||
from_img = True
|
||||
elif isinstance(source, torch.Tensor):
|
||||
tensor = True
|
||||
else:
|
||||
raise TypeError("Unsupported image type. For supported types see https://docs.ultralytics.com/modes/predict")
|
||||
|
||||
return source, webcam, screenshot, from_img, in_memory, tensor
|
||||
|
||||
|
||||
def load_inference_source(source=None, batch=1, vid_stride=1, buffer=False):
|
||||
"""
|
||||
Loads an inference source for object detection and applies necessary transformations.
|
||||
|
||||
Args:
|
||||
source (str, Path, Tensor, PIL.Image, np.ndarray): The input source for inference.
|
||||
batch (int, optional): Batch size for dataloaders. Default is 1.
|
||||
vid_stride (int, optional): The frame interval for video sources. Default is 1.
|
||||
buffer (bool, optional): Determined whether stream frames will be buffered. Default is False.
|
||||
|
||||
Returns:
|
||||
dataset (Dataset): A dataset object for the specified input source.
|
||||
"""
|
||||
source, stream, screenshot, from_img, in_memory, tensor = check_source(source)
|
||||
source_type = source.source_type if in_memory else SourceTypes(stream, screenshot, from_img, tensor)
|
||||
|
||||
# Dataloader
|
||||
if tensor:
|
||||
dataset = LoadTensor(source)
|
||||
elif in_memory:
|
||||
dataset = source
|
||||
elif stream:
|
||||
dataset = LoadStreams(source, vid_stride=vid_stride, buffer=buffer)
|
||||
elif screenshot:
|
||||
dataset = LoadScreenshots(source)
|
||||
elif from_img:
|
||||
dataset = LoadPilAndNumpy(source)
|
||||
else:
|
||||
dataset = LoadImagesAndVideos(source, batch=batch, vid_stride=vid_stride)
|
||||
|
||||
# Attach source types to the dataset
|
||||
setattr(dataset, "source_type", source_type)
|
||||
|
||||
return dataset
|
542
ultralytics/data/converter.py
Normal file
542
ultralytics/data/converter.py
Normal file
@ -0,0 +1,542 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from ultralytics.utils import LOGGER, TQDM
|
||||
from ultralytics.utils.files import increment_path
|
||||
|
||||
|
||||
def coco91_to_coco80_class():
|
||||
"""
|
||||
Converts 91-index COCO class IDs to 80-index COCO class IDs.
|
||||
|
||||
Returns:
|
||||
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the
|
||||
corresponding 91-index class ID.
|
||||
"""
|
||||
return [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
None,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
None,
|
||||
24,
|
||||
25,
|
||||
None,
|
||||
None,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
None,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
None,
|
||||
60,
|
||||
None,
|
||||
None,
|
||||
61,
|
||||
None,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
None,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
None,
|
||||
]
|
||||
|
||||
|
||||
def coco80_to_coco91_class():
|
||||
"""
|
||||
Converts 80-index (val2014) to 91-index (paper).
|
||||
For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/.
|
||||
|
||||
Example:
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
||||
b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
||||
x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
||||
x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
||||
```
|
||||
"""
|
||||
return [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
27,
|
||||
28,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
67,
|
||||
70,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
81,
|
||||
82,
|
||||
84,
|
||||
85,
|
||||
86,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
90,
|
||||
]
|
||||
|
||||
|
||||
def convert_coco(
|
||||
labels_dir="../coco/annotations/",
|
||||
save_dir="coco_converted/",
|
||||
use_segments=False,
|
||||
use_keypoints=False,
|
||||
cls91to80=True,
|
||||
):
|
||||
"""
|
||||
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models.
|
||||
|
||||
Args:
|
||||
labels_dir (str, optional): Path to directory containing COCO dataset annotation files.
|
||||
save_dir (str, optional): Path to directory to save results to.
|
||||
use_segments (bool, optional): Whether to include segmentation masks in the output.
|
||||
use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
|
||||
cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.converter import convert_coco
|
||||
|
||||
convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
|
||||
```
|
||||
|
||||
Output:
|
||||
Generates output files in the specified output directory.
|
||||
"""
|
||||
|
||||
# Create dataset directory
|
||||
save_dir = increment_path(save_dir) # increment if save directory already exists
|
||||
for p in save_dir / "labels", save_dir / "images":
|
||||
p.mkdir(parents=True, exist_ok=True) # make dir
|
||||
|
||||
# Convert classes
|
||||
coco80 = coco91_to_coco80_class()
|
||||
|
||||
# Import json
|
||||
for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
|
||||
fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "") # folder name
|
||||
fn.mkdir(parents=True, exist_ok=True)
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Create image dict
|
||||
images = {f'{x["id"]:d}': x for x in data["images"]}
|
||||
# Create image-annotations dict
|
||||
imgToAnns = defaultdict(list)
|
||||
for ann in data["annotations"]:
|
||||
imgToAnns[ann["image_id"]].append(ann)
|
||||
|
||||
# Write labels file
|
||||
for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
|
||||
img = images[f"{img_id:d}"]
|
||||
h, w, f = img["height"], img["width"], img["file_name"]
|
||||
|
||||
bboxes = []
|
||||
segments = []
|
||||
keypoints = []
|
||||
for ann in anns:
|
||||
if ann["iscrowd"]:
|
||||
continue
|
||||
# The COCO box format is [top left x, top left y, width, height]
|
||||
box = np.array(ann["bbox"], dtype=np.float64)
|
||||
box[:2] += box[2:] / 2 # xy top-left corner to center
|
||||
box[[0, 2]] /= w # normalize x
|
||||
box[[1, 3]] /= h # normalize y
|
||||
if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0
|
||||
continue
|
||||
|
||||
cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1 # class
|
||||
box = [cls] + box.tolist()
|
||||
if box not in bboxes:
|
||||
bboxes.append(box)
|
||||
if use_segments and ann.get("segmentation") is not None:
|
||||
if len(ann["segmentation"]) == 0:
|
||||
segments.append([])
|
||||
continue
|
||||
elif len(ann["segmentation"]) > 1:
|
||||
s = merge_multi_segment(ann["segmentation"])
|
||||
s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
|
||||
else:
|
||||
s = [j for i in ann["segmentation"] for j in i] # all segments concatenated
|
||||
s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
|
||||
s = [cls] + s
|
||||
segments.append(s)
|
||||
if use_keypoints and ann.get("keypoints") is not None:
|
||||
keypoints.append(
|
||||
box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist()
|
||||
)
|
||||
|
||||
# Write
|
||||
with open((fn / f).with_suffix(".txt"), "a") as file:
|
||||
for i in range(len(bboxes)):
|
||||
if use_keypoints:
|
||||
line = (*(keypoints[i]),) # cls, box, keypoints
|
||||
else:
|
||||
line = (
|
||||
*(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]),
|
||||
) # cls, box or segments
|
||||
file.write(("%g " * len(line)).rstrip() % line + "\n")
|
||||
|
||||
LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")
|
||||
|
||||
|
||||
def convert_dota_to_yolo_obb(dota_root_path: str):
|
||||
"""
|
||||
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.
|
||||
|
||||
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the
|
||||
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.
|
||||
|
||||
Args:
|
||||
dota_root_path (str): The root directory path of the DOTA dataset.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.converter import convert_dota_to_yolo_obb
|
||||
|
||||
convert_dota_to_yolo_obb('path/to/DOTA')
|
||||
```
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
|
||||
- DOTA
|
||||
├─ images
|
||||
│ ├─ train
|
||||
│ └─ val
|
||||
└─ labels
|
||||
├─ train_original
|
||||
└─ val_original
|
||||
|
||||
After execution, the function will organize the labels into:
|
||||
|
||||
- DOTA
|
||||
└─ labels
|
||||
├─ train
|
||||
└─ val
|
||||
"""
|
||||
dota_root_path = Path(dota_root_path)
|
||||
|
||||
# Class names to indices mapping
|
||||
class_mapping = {
|
||||
"plane": 0,
|
||||
"ship": 1,
|
||||
"storage-tank": 2,
|
||||
"baseball-diamond": 3,
|
||||
"tennis-court": 4,
|
||||
"basketball-court": 5,
|
||||
"ground-track-field": 6,
|
||||
"harbor": 7,
|
||||
"bridge": 8,
|
||||
"large-vehicle": 9,
|
||||
"small-vehicle": 10,
|
||||
"helicopter": 11,
|
||||
"roundabout": 12,
|
||||
"soccer-ball-field": 13,
|
||||
"swimming-pool": 14,
|
||||
"container-crane": 15,
|
||||
"airport": 16,
|
||||
"helipad": 17,
|
||||
}
|
||||
|
||||
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir):
|
||||
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory."""
|
||||
orig_label_path = orig_label_dir / f"{image_name}.txt"
|
||||
save_path = save_dir / f"{image_name}.txt"
|
||||
|
||||
with orig_label_path.open("r") as f, save_path.open("w") as g:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
parts = line.strip().split()
|
||||
if len(parts) < 9:
|
||||
continue
|
||||
class_name = parts[8]
|
||||
class_idx = class_mapping[class_name]
|
||||
coords = [float(p) for p in parts[:8]]
|
||||
normalized_coords = [
|
||||
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8)
|
||||
]
|
||||
formatted_coords = ["{:.6g}".format(coord) for coord in normalized_coords]
|
||||
g.write(f"{class_idx} {' '.join(formatted_coords)}\n")
|
||||
|
||||
for phase in ["train", "val"]:
|
||||
image_dir = dota_root_path / "images" / phase
|
||||
orig_label_dir = dota_root_path / "labels" / f"{phase}_original"
|
||||
save_dir = dota_root_path / "labels" / phase
|
||||
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
image_paths = list(image_dir.iterdir())
|
||||
for image_path in TQDM(image_paths, desc=f"Processing {phase} images"):
|
||||
if image_path.suffix != ".png":
|
||||
continue
|
||||
image_name_without_ext = image_path.stem
|
||||
img = cv2.imread(str(image_path))
|
||||
h, w = img.shape[:2]
|
||||
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir)
|
||||
|
||||
|
||||
def min_index(arr1, arr2):
|
||||
"""
|
||||
Find a pair of indexes with the shortest distance between two arrays of 2D points.
|
||||
|
||||
Args:
|
||||
arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points.
|
||||
arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points.
|
||||
|
||||
Returns:
|
||||
(tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively.
|
||||
"""
|
||||
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
|
||||
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
|
||||
|
||||
|
||||
def merge_multi_segment(segments):
|
||||
"""
|
||||
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
|
||||
This function connects these coordinates with a thin line to merge all segments into one.
|
||||
|
||||
Args:
|
||||
segments (List[List]): Original segmentations in COCO's JSON file.
|
||||
Each element is a list of coordinates, like [segmentation1, segmentation2,...].
|
||||
|
||||
Returns:
|
||||
s (List[np.ndarray]): A list of connected segments represented as NumPy arrays.
|
||||
"""
|
||||
s = []
|
||||
segments = [np.array(i).reshape(-1, 2) for i in segments]
|
||||
idx_list = [[] for _ in range(len(segments))]
|
||||
|
||||
# Record the indexes with min distance between each segment
|
||||
for i in range(1, len(segments)):
|
||||
idx1, idx2 = min_index(segments[i - 1], segments[i])
|
||||
idx_list[i - 1].append(idx1)
|
||||
idx_list[i].append(idx2)
|
||||
|
||||
# Use two round to connect all the segments
|
||||
for k in range(2):
|
||||
# Forward connection
|
||||
if k == 0:
|
||||
for i, idx in enumerate(idx_list):
|
||||
# Middle segments have two indexes, reverse the index of middle segments
|
||||
if len(idx) == 2 and idx[0] > idx[1]:
|
||||
idx = idx[::-1]
|
||||
segments[i] = segments[i][::-1, :]
|
||||
|
||||
segments[i] = np.roll(segments[i], -idx[0], axis=0)
|
||||
segments[i] = np.concatenate([segments[i], segments[i][:1]])
|
||||
# Deal with the first segment and the last one
|
||||
if i in [0, len(idx_list) - 1]:
|
||||
s.append(segments[i])
|
||||
else:
|
||||
idx = [0, idx[1] - idx[0]]
|
||||
s.append(segments[i][idx[0] : idx[1] + 1])
|
||||
|
||||
else:
|
||||
for i in range(len(idx_list) - 1, -1, -1):
|
||||
if i not in [0, len(idx_list) - 1]:
|
||||
idx = idx_list[i]
|
||||
nidx = abs(idx[1] - idx[0])
|
||||
s.append(segments[i][nidx:])
|
||||
return s
|
||||
|
||||
|
||||
def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
|
||||
"""
|
||||
Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB)
|
||||
in YOLO format. Generates segmentation data using SAM auto-annotator as needed.
|
||||
|
||||
Args:
|
||||
im_dir (str | Path): Path to image directory to convert.
|
||||
save_dir (str | Path): Path to save the generated labels, labels will be saved
|
||||
into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None.
|
||||
sam_model (str): Segmentation model to use for intermediate segmentation data; optional.
|
||||
|
||||
Notes:
|
||||
The input directory structure assumed for dataset:
|
||||
|
||||
- im_dir
|
||||
├─ 001.jpg
|
||||
├─ ..
|
||||
└─ NNN.jpg
|
||||
- labels
|
||||
├─ 001.txt
|
||||
├─ ..
|
||||
└─ NNN.txt
|
||||
"""
|
||||
from ultralytics.data import YOLODataset
|
||||
from ultralytics.utils.ops import xywh2xyxy
|
||||
from ultralytics.utils import LOGGER
|
||||
from ultralytics import SAM
|
||||
from tqdm import tqdm
|
||||
|
||||
# NOTE: add placeholder to pass class index check
|
||||
dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))
|
||||
if len(dataset.labels[0]["segments"]) > 0: # if it's segment data
|
||||
LOGGER.info("Segmentation labels detected, no need to generate new ones!")
|
||||
return
|
||||
|
||||
LOGGER.info("Detection labels detected, generating segment labels by SAM model!")
|
||||
sam_model = SAM(sam_model)
|
||||
for l in tqdm(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"):
|
||||
h, w = l["shape"]
|
||||
boxes = l["bboxes"]
|
||||
if len(boxes) == 0: # skip empty labels
|
||||
continue
|
||||
boxes[:, [0, 2]] *= w
|
||||
boxes[:, [1, 3]] *= h
|
||||
im = cv2.imread(l["im_file"])
|
||||
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False)
|
||||
l["segments"] = sam_results[0].masks.xyn
|
||||
|
||||
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment"
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
for l in dataset.labels:
|
||||
texts = []
|
||||
lb_name = Path(l["im_file"]).with_suffix(".txt").name
|
||||
txt_file = save_dir / lb_name
|
||||
cls = l["cls"]
|
||||
for i, s in enumerate(l["segments"]):
|
||||
line = (int(cls[i]), *s.reshape(-1))
|
||||
texts.append(("%g " * len(line)).rstrip() % line)
|
||||
if texts:
|
||||
with open(txt_file, "a") as f:
|
||||
f.writelines(text + "\n" for text in texts)
|
||||
LOGGER.info(f"Generated segment labels saved in {save_dir}")
|
383
ultralytics/data/dataset.py
Normal file
383
ultralytics/data/dataset.py
Normal file
@ -0,0 +1,383 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
import contextlib
|
||||
from itertools import repeat
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
|
||||
from ultralytics.utils.ops import resample_segments
|
||||
from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
|
||||
from .base import BaseDataset
|
||||
from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
|
||||
|
||||
# Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
|
||||
DATASET_CACHE_VERSION = "1.0.3"
|
||||
|
||||
|
||||
class YOLODataset(BaseDataset):
|
||||
"""
|
||||
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
||||
|
||||
Args:
|
||||
data (dict, optional): A dataset YAML dictionary. Defaults to None.
|
||||
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
||||
|
||||
Returns:
|
||||
(torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, data=None, task="detect", **kwargs):
|
||||
"""Initializes the YOLODataset with optional configurations for segments and keypoints."""
|
||||
self.use_segments = task == "segment"
|
||||
self.use_keypoints = task == "pose"
|
||||
self.use_obb = task == "obb"
|
||||
self.data = data
|
||||
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def cache_labels(self, path=Path("./labels.cache")):
|
||||
"""
|
||||
Cache dataset labels, check images and read shapes.
|
||||
|
||||
Args:
|
||||
path (Path): Path where to save the cache file. Default is Path('./labels.cache').
|
||||
|
||||
Returns:
|
||||
(dict): labels.
|
||||
"""
|
||||
x = {"labels": []}
|
||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||
desc = f"{self.prefix}Scanning {path.parent / path.stem}..."
|
||||
total = len(self.im_files)
|
||||
nkpt, ndim = self.data.get("kpt_shape", (0, 0))
|
||||
if self.use_keypoints and (nkpt <= 0 or ndim not in (2, 3)):
|
||||
raise ValueError(
|
||||
"'kpt_shape' in data.yaml missing or incorrect. Should be a list with [number of "
|
||||
"keypoints, number of dims (2 for x,y or 3 for x,y,visible)], i.e. 'kpt_shape: [17, 3]'"
|
||||
)
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(
|
||||
func=verify_image_label,
|
||||
iterable=zip(
|
||||
self.im_files,
|
||||
self.label_files,
|
||||
repeat(self.prefix),
|
||||
repeat(self.use_keypoints),
|
||||
repeat(len(self.data["names"])),
|
||||
repeat(nkpt),
|
||||
repeat(ndim),
|
||||
),
|
||||
)
|
||||
pbar = TQDM(results, desc=desc, total=total)
|
||||
for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar:
|
||||
nm += nm_f
|
||||
nf += nf_f
|
||||
ne += ne_f
|
||||
nc += nc_f
|
||||
if im_file:
|
||||
x["labels"].append(
|
||||
dict(
|
||||
im_file=im_file,
|
||||
shape=shape,
|
||||
cls=lb[:, 0:1], # n, 1
|
||||
bboxes=lb[:, 1:], # n, 4
|
||||
segments=segments,
|
||||
keypoints=keypoint,
|
||||
normalized=True,
|
||||
bbox_format="xywh",
|
||||
)
|
||||
)
|
||||
if msg:
|
||||
msgs.append(msg)
|
||||
pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt"
|
||||
pbar.close()
|
||||
|
||||
if msgs:
|
||||
LOGGER.info("\n".join(msgs))
|
||||
if nf == 0:
|
||||
LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}")
|
||||
x["hash"] = get_hash(self.label_files + self.im_files)
|
||||
x["results"] = nf, nm, ne, nc, len(self.im_files)
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
return x
|
||||
|
||||
def get_labels(self):
|
||||
"""Returns dictionary of labels for YOLO training."""
|
||||
self.label_files = img2label_paths(self.im_files)
|
||||
cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
|
||||
try:
|
||||
cache, exists = load_dataset_cache_file(cache_path), True # attempt to load a *.cache file
|
||||
assert cache["version"] == DATASET_CACHE_VERSION # matches current version
|
||||
assert cache["hash"] == get_hash(self.label_files + self.im_files) # identical hash
|
||||
except (FileNotFoundError, AssertionError, AttributeError):
|
||||
cache, exists = self.cache_labels(cache_path), False # run cache ops
|
||||
|
||||
# Display cache
|
||||
nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupt, total
|
||||
if exists and LOCAL_RANK in (-1, 0):
|
||||
d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt"
|
||||
TQDM(None, desc=self.prefix + d, total=n, initial=n) # display results
|
||||
if cache["msgs"]:
|
||||
LOGGER.info("\n".join(cache["msgs"])) # display warnings
|
||||
|
||||
# Read cache
|
||||
[cache.pop(k) for k in ("hash", "version", "msgs")] # remove items
|
||||
labels = cache["labels"]
|
||||
if not labels:
|
||||
LOGGER.warning(f"WARNING ⚠️ No images found in {cache_path}, training may not work correctly. {HELP_URL}")
|
||||
self.im_files = [lb["im_file"] for lb in labels] # update im_files
|
||||
|
||||
# Check if the dataset is all boxes or all segments
|
||||
lengths = ((len(lb["cls"]), len(lb["bboxes"]), len(lb["segments"])) for lb in labels)
|
||||
len_cls, len_boxes, len_segments = (sum(x) for x in zip(*lengths))
|
||||
if len_segments and len_boxes != len_segments:
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = {len_segments}, "
|
||||
f"len(boxes) = {len_boxes}. To resolve this only boxes will be used and all segments will be removed. "
|
||||
"To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset."
|
||||
)
|
||||
for lb in labels:
|
||||
lb["segments"] = []
|
||||
if len_cls == 0:
|
||||
LOGGER.warning(f"WARNING ⚠️ No labels found in {cache_path}, training may not work correctly. {HELP_URL}")
|
||||
return labels
|
||||
|
||||
def build_transforms(self, hyp=None):
|
||||
"""Builds and appends transforms to the list."""
|
||||
if self.augment:
|
||||
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
||||
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
||||
transforms = v8_transforms(self, self.imgsz, hyp)
|
||||
else:
|
||||
transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
|
||||
transforms.append(
|
||||
Format(
|
||||
bbox_format="xywh",
|
||||
normalize=True,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
return_obb=self.use_obb,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask,
|
||||
bgr=hyp.bgr if self.augment else 0.0, # only affect training.
|
||||
)
|
||||
)
|
||||
return transforms
|
||||
|
||||
def close_mosaic(self, hyp):
|
||||
"""Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
|
||||
hyp.mosaic = 0.0 # set mosaic ratio=0.0
|
||||
hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic
|
||||
hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic
|
||||
self.transforms = self.build_transforms(hyp)
|
||||
|
||||
def update_labels_info(self, label):
|
||||
"""
|
||||
Custom your label format here.
|
||||
|
||||
Note:
|
||||
cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
||||
Can also support classification and semantic segmentation by adding or removing dict keys there.
|
||||
"""
|
||||
bboxes = label.pop("bboxes")
|
||||
segments = label.pop("segments", [])
|
||||
keypoints = label.pop("keypoints", None)
|
||||
bbox_format = label.pop("bbox_format")
|
||||
normalized = label.pop("normalized")
|
||||
|
||||
# NOTE: do NOT resample oriented boxes
|
||||
segment_resamples = 100 if self.use_obb else 1000
|
||||
if len(segments) > 0:
|
||||
# list[np.array(1000, 2)] * num_samples
|
||||
# (N, 1000, 2)
|
||||
segments = np.stack(resample_segments(segments, n=segment_resamples), axis=0)
|
||||
else:
|
||||
segments = np.zeros((0, segment_resamples, 2), dtype=np.float32)
|
||||
label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized)
|
||||
return label
|
||||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
"""Collates data samples into batches."""
|
||||
new_batch = {}
|
||||
keys = batch[0].keys()
|
||||
values = list(zip(*[list(b.values()) for b in batch]))
|
||||
for i, k in enumerate(keys):
|
||||
value = values[i]
|
||||
if k == "img":
|
||||
value = torch.stack(value, 0)
|
||||
if k in ["masks", "keypoints", "bboxes", "cls", "segments", "obb"]:
|
||||
value = torch.cat(value, 0)
|
||||
new_batch[k] = value
|
||||
new_batch["batch_idx"] = list(new_batch["batch_idx"])
|
||||
for i in range(len(new_batch["batch_idx"])):
|
||||
new_batch["batch_idx"][i] += i # add target image index for build_targets()
|
||||
new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0)
|
||||
return new_batch
|
||||
|
||||
|
||||
# Classification dataloaders -------------------------------------------------------------------------------------------
|
||||
class ClassificationDataset(torchvision.datasets.ImageFolder):
|
||||
"""
|
||||
Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
|
||||
augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
|
||||
learning models, with optional image transformations and caching mechanisms to speed up training.
|
||||
|
||||
This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
|
||||
in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
|
||||
to ensure data integrity and consistency.
|
||||
|
||||
Attributes:
|
||||
cache_ram (bool): Indicates if caching in RAM is enabled.
|
||||
cache_disk (bool): Indicates if caching on disk is enabled.
|
||||
samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
|
||||
file (if caching on disk), and optionally the loaded image array (if caching in RAM).
|
||||
torch_transforms (callable): PyTorch transforms to be applied to the images.
|
||||
"""
|
||||
|
||||
def __init__(self, root, args, augment=False, prefix=""):
|
||||
"""
|
||||
Initialize YOLO object with root, image size, augmentations, and cache settings.
|
||||
|
||||
Args:
|
||||
root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
|
||||
args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
|
||||
parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
|
||||
of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
|
||||
`auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
|
||||
augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
|
||||
prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
|
||||
debugging. Default is an empty string.
|
||||
"""
|
||||
super().__init__(root=root)
|
||||
if augment and args.fraction < 1.0: # reduce training fraction
|
||||
self.samples = self.samples[: round(len(self.samples) * args.fraction)]
|
||||
self.prefix = colorstr(f"{prefix}: ") if prefix else ""
|
||||
self.cache_ram = args.cache is True or args.cache == "ram" # cache images into RAM
|
||||
self.cache_disk = args.cache == "disk" # cache images on hard drive as uncompressed *.npy files
|
||||
self.samples = self.verify_images() # filter out bad images
|
||||
self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
|
||||
scale = (1.0 - args.scale, 1.0) # (0.08, 1.0)
|
||||
self.torch_transforms = (
|
||||
classify_augmentations(
|
||||
size=args.imgsz,
|
||||
scale=scale,
|
||||
hflip=args.fliplr,
|
||||
vflip=args.flipud,
|
||||
erasing=args.erasing,
|
||||
auto_augment=args.auto_augment,
|
||||
hsv_h=args.hsv_h,
|
||||
hsv_s=args.hsv_s,
|
||||
hsv_v=args.hsv_v,
|
||||
)
|
||||
if augment
|
||||
else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
|
||||
)
|
||||
|
||||
def __getitem__(self, i):
|
||||
"""Returns subset of data and targets corresponding to given indices."""
|
||||
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
|
||||
if self.cache_ram and im is None:
|
||||
im = self.samples[i][3] = cv2.imread(f)
|
||||
elif self.cache_disk:
|
||||
if not fn.exists(): # load npy
|
||||
np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
|
||||
im = np.load(fn)
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
# Convert NumPy array to PIL image
|
||||
im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
|
||||
sample = self.torch_transforms(im)
|
||||
return {"img": sample, "cls": j}
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Return the total number of samples in the dataset."""
|
||||
return len(self.samples)
|
||||
|
||||
def verify_images(self):
|
||||
"""Verify all images in dataset."""
|
||||
desc = f"{self.prefix}Scanning {self.root}..."
|
||||
path = Path(self.root).with_suffix(".cache") # *.cache file path
|
||||
|
||||
with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
|
||||
cache = load_dataset_cache_file(path) # attempt to load a *.cache file
|
||||
assert cache["version"] == DATASET_CACHE_VERSION # matches current version
|
||||
assert cache["hash"] == get_hash([x[0] for x in self.samples]) # identical hash
|
||||
nf, nc, n, samples = cache.pop("results") # found, missing, empty, corrupt, total
|
||||
if LOCAL_RANK in (-1, 0):
|
||||
d = f"{desc} {nf} images, {nc} corrupt"
|
||||
TQDM(None, desc=d, total=n, initial=n)
|
||||
if cache["msgs"]:
|
||||
LOGGER.info("\n".join(cache["msgs"])) # display warnings
|
||||
return samples
|
||||
|
||||
# Run scan if *.cache retrieval failed
|
||||
nf, nc, msgs, samples, x = 0, 0, [], [], {}
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
|
||||
pbar = TQDM(results, desc=desc, total=len(self.samples))
|
||||
for sample, nf_f, nc_f, msg in pbar:
|
||||
if nf_f:
|
||||
samples.append(sample)
|
||||
if msg:
|
||||
msgs.append(msg)
|
||||
nf += nf_f
|
||||
nc += nc_f
|
||||
pbar.desc = f"{desc} {nf} images, {nc} corrupt"
|
||||
pbar.close()
|
||||
if msgs:
|
||||
LOGGER.info("\n".join(msgs))
|
||||
x["hash"] = get_hash([x[0] for x in self.samples])
|
||||
x["results"] = nf, nc, len(samples), samples
|
||||
x["msgs"] = msgs # warnings
|
||||
save_dataset_cache_file(self.prefix, path, x)
|
||||
return samples
|
||||
|
||||
|
||||
def load_dataset_cache_file(path):
|
||||
"""Load an Ultralytics *.cache dictionary from path."""
|
||||
import gc
|
||||
|
||||
gc.disable() # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
|
||||
cache = np.load(str(path), allow_pickle=True).item() # load dict
|
||||
gc.enable()
|
||||
return cache
|
||||
|
||||
|
||||
def save_dataset_cache_file(prefix, path, x):
|
||||
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
||||
x["version"] = DATASET_CACHE_VERSION # add cache version
|
||||
if is_dir_writeable(path.parent):
|
||||
if path.exists():
|
||||
path.unlink() # remove *.cache file if exists
|
||||
np.save(str(path), x) # save cache for next time
|
||||
path.with_suffix(".cache.npy").rename(path) # remove .npy suffix
|
||||
LOGGER.info(f"{prefix}New cache created: {path}")
|
||||
else:
|
||||
LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
|
||||
|
||||
|
||||
# TODO: support semantic segmentation
|
||||
class SemanticDataset(BaseDataset):
|
||||
"""
|
||||
Semantic Segmentation Dataset.
|
||||
|
||||
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
|
||||
from the BaseDataset class.
|
||||
|
||||
Note:
|
||||
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
|
||||
semantic segmentation tasks.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize a SemanticDataset object."""
|
||||
super().__init__()
|
5
ultralytics/data/explorer/__init__.py
Normal file
5
ultralytics/data/explorer/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from .utils import plot_query_result
|
||||
|
||||
__all__ = ["plot_query_result"]
|
472
ultralytics/data/explorer/explorer.py
Normal file
472
ultralytics/data/explorer/explorer.py
Normal file
@ -0,0 +1,472 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
from matplotlib import pyplot as plt
|
||||
from pandas import DataFrame
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.augment import Format
|
||||
from ultralytics.data.dataset import YOLODataset
|
||||
from ultralytics.data.utils import check_det_dataset
|
||||
from ultralytics.models.yolo.model import YOLO
|
||||
from ultralytics.utils import LOGGER, IterableSimpleNamespace, checks, USER_CONFIG_DIR
|
||||
from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
|
||||
|
||||
|
||||
class ExplorerDataset(YOLODataset):
|
||||
def __init__(self, *args, data: dict = None, **kwargs) -> None:
|
||||
super().__init__(*args, data=data, **kwargs)
|
||||
|
||||
def load_image(self, i: int) -> Union[Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]], Tuple[None, None, None]]:
|
||||
"""Loads 1 image from dataset index 'i' without any resize ops."""
|
||||
im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
|
||||
if im is None: # not cached in RAM
|
||||
if fn.exists(): # load npy
|
||||
im = np.load(fn)
|
||||
else: # read image
|
||||
im = cv2.imread(f) # BGR
|
||||
if im is None:
|
||||
raise FileNotFoundError(f"Image Not Found {f}")
|
||||
h0, w0 = im.shape[:2] # orig hw
|
||||
return im, (h0, w0), im.shape[:2]
|
||||
|
||||
return self.ims[i], self.im_hw0[i], self.im_hw[i]
|
||||
|
||||
def build_transforms(self, hyp: IterableSimpleNamespace = None):
|
||||
"""Creates transforms for dataset images without resizing."""
|
||||
return Format(
|
||||
bbox_format="xyxy",
|
||||
normalize=False,
|
||||
return_mask=self.use_segments,
|
||||
return_keypoint=self.use_keypoints,
|
||||
batch_idx=True,
|
||||
mask_ratio=hyp.mask_ratio,
|
||||
mask_overlap=hyp.overlap_mask,
|
||||
)
|
||||
|
||||
|
||||
class Explorer:
|
||||
def __init__(
|
||||
self,
|
||||
data: Union[str, Path] = "coco128.yaml",
|
||||
model: str = "yolov8n.pt",
|
||||
uri: str = USER_CONFIG_DIR / "explorer",
|
||||
) -> None:
|
||||
# Note duckdb==0.10.0 bug https://github.com/ultralytics/ultralytics/pull/8181
|
||||
checks.check_requirements(["lancedb>=0.4.3", "duckdb<=0.9.2"])
|
||||
import lancedb
|
||||
|
||||
self.connection = lancedb.connect(uri)
|
||||
self.table_name = Path(data).name.lower() + "_" + model.lower()
|
||||
self.sim_idx_base_name = (
|
||||
f"{self.table_name}_sim_idx".lower()
|
||||
) # Use this name and append thres and top_k to reuse the table
|
||||
self.model = YOLO(model)
|
||||
self.data = data # None
|
||||
self.choice_set = None
|
||||
|
||||
self.table = None
|
||||
self.progress = 0
|
||||
|
||||
def create_embeddings_table(self, force: bool = False, split: str = "train") -> None:
|
||||
"""
|
||||
Create LanceDB table containing the embeddings of the images in the dataset. The table will be reused if it
|
||||
already exists. Pass force=True to overwrite the existing table.
|
||||
|
||||
Args:
|
||||
force (bool): Whether to overwrite the existing table or not. Defaults to False.
|
||||
split (str): Split of the dataset to use. Defaults to 'train'.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
```
|
||||
"""
|
||||
if self.table is not None and not force:
|
||||
LOGGER.info("Table already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
return
|
||||
if self.table_name in self.connection.table_names() and not force:
|
||||
LOGGER.info(f"Table {self.table_name} already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
self.table = self.connection.open_table(self.table_name)
|
||||
self.progress = 1
|
||||
return
|
||||
if self.data is None:
|
||||
raise ValueError("Data must be provided to create embeddings table")
|
||||
|
||||
data_info = check_det_dataset(self.data)
|
||||
if split not in data_info:
|
||||
raise ValueError(
|
||||
f"Split {split} is not found in the dataset. Available keys in the dataset are {list(data_info.keys())}"
|
||||
)
|
||||
|
||||
choice_set = data_info[split]
|
||||
choice_set = choice_set if isinstance(choice_set, list) else [choice_set]
|
||||
self.choice_set = choice_set
|
||||
dataset = ExplorerDataset(img_path=choice_set, data=data_info, augment=False, cache=False, task=self.model.task)
|
||||
|
||||
# Create the table schema
|
||||
batch = dataset[0]
|
||||
vector_size = self.model.embed(batch["im_file"], verbose=False)[0].shape[0]
|
||||
table = self.connection.create_table(self.table_name, schema=get_table_schema(vector_size), mode="overwrite")
|
||||
table.add(
|
||||
self._yield_batches(
|
||||
dataset,
|
||||
data_info,
|
||||
self.model,
|
||||
exclude_keys=["img", "ratio_pad", "resized_shape", "ori_shape", "batch_idx"],
|
||||
)
|
||||
)
|
||||
|
||||
self.table = table
|
||||
|
||||
def _yield_batches(self, dataset: ExplorerDataset, data_info: dict, model: YOLO, exclude_keys: List[str]):
|
||||
"""Generates batches of data for embedding, excluding specified keys."""
|
||||
for i in tqdm(range(len(dataset))):
|
||||
self.progress = float(i + 1) / len(dataset)
|
||||
batch = dataset[i]
|
||||
for k in exclude_keys:
|
||||
batch.pop(k, None)
|
||||
batch = sanitize_batch(batch, data_info)
|
||||
batch["vector"] = model.embed(batch["im_file"], verbose=False)[0].detach().tolist()
|
||||
yield [batch]
|
||||
|
||||
def query(
|
||||
self, imgs: Union[str, np.ndarray, List[str], List[np.ndarray]] = None, limit: int = 25
|
||||
) -> Any: # pyarrow.Table
|
||||
"""
|
||||
Query the table for similar images. Accepts a single image or a list of images.
|
||||
|
||||
Args:
|
||||
imgs (str or list): Path to the image or a list of paths to the images.
|
||||
limit (int): Number of results to return.
|
||||
|
||||
Returns:
|
||||
(pyarrow.Table): An arrow table containing the results. Supports converting to:
|
||||
- pandas dataframe: `result.to_pandas()`
|
||||
- dict of lists: `result.to_pydict()`
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.query(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
if isinstance(imgs, str):
|
||||
imgs = [imgs]
|
||||
assert isinstance(imgs, list), f"img must be a string or a list of strings. Got {type(imgs)}"
|
||||
embeds = self.model.embed(imgs)
|
||||
# Get avg if multiple images are passed (len > 1)
|
||||
embeds = torch.mean(torch.stack(embeds), 0).cpu().numpy() if len(embeds) > 1 else embeds[0].cpu().numpy()
|
||||
return self.table.search(embeds).limit(limit).to_arrow()
|
||||
|
||||
def sql_query(
|
||||
self, query: str, return_type: str = "pandas"
|
||||
) -> Union[DataFrame, Any, None]: # pandas.dataframe or pyarrow.Table
|
||||
"""
|
||||
Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
|
||||
|
||||
Args:
|
||||
query (str): SQL query to run.
|
||||
return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
|
||||
|
||||
Returns:
|
||||
(pyarrow.Table): An arrow table containing the results.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
|
||||
result = exp.sql_query(query)
|
||||
```
|
||||
"""
|
||||
assert return_type in {
|
||||
"pandas",
|
||||
"arrow",
|
||||
}, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
|
||||
import duckdb
|
||||
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
|
||||
# Note: using filter pushdown would be a better long term solution. Temporarily using duckdb for this.
|
||||
table = self.table.to_arrow() # noqa NOTE: Don't comment this. This line is used by DuckDB
|
||||
if not query.startswith("SELECT") and not query.startswith("WHERE"):
|
||||
raise ValueError(
|
||||
f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause. found {query}"
|
||||
)
|
||||
if query.startswith("WHERE"):
|
||||
query = f"SELECT * FROM 'table' {query}"
|
||||
LOGGER.info(f"Running query: {query}")
|
||||
|
||||
rs = duckdb.sql(query)
|
||||
if return_type == "arrow":
|
||||
return rs.arrow()
|
||||
elif return_type == "pandas":
|
||||
return rs.df()
|
||||
|
||||
def plot_sql_query(self, query: str, labels: bool = True) -> Image.Image:
|
||||
"""
|
||||
Plot the results of a SQL-Like query on the table.
|
||||
Args:
|
||||
query (str): SQL query to run.
|
||||
labels (bool): Whether to plot the labels or not.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
query = "SELECT * FROM 'table' WHERE labels LIKE '%person%'"
|
||||
result = exp.plot_sql_query(query)
|
||||
```
|
||||
"""
|
||||
result = self.sql_query(query, return_type="arrow")
|
||||
if len(result) == 0:
|
||||
LOGGER.info("No results found.")
|
||||
return None
|
||||
img = plot_query_result(result, plot_labels=labels)
|
||||
return Image.fromarray(img)
|
||||
|
||||
def get_similar(
|
||||
self,
|
||||
img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
|
||||
idx: Union[int, List[int]] = None,
|
||||
limit: int = 25,
|
||||
return_type: str = "pandas",
|
||||
) -> Union[DataFrame, Any]: # pandas.dataframe or pyarrow.Table
|
||||
"""
|
||||
Query the table for similar images. Accepts a single image or a list of images.
|
||||
|
||||
Args:
|
||||
img (str or list): Path to the image or a list of paths to the images.
|
||||
idx (int or list): Index of the image in the table or a list of indexes.
|
||||
limit (int): Number of results to return. Defaults to 25.
|
||||
return_type (str): Type of the result to return. Can be either 'pandas' or 'arrow'. Defaults to 'pandas'.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing the results.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.get_similar(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
assert return_type in {
|
||||
"pandas",
|
||||
"arrow",
|
||||
}, f"Return type should be either `pandas` or `arrow`, but got {return_type}"
|
||||
img = self._check_imgs_or_idxs(img, idx)
|
||||
similar = self.query(img, limit=limit)
|
||||
|
||||
if return_type == "arrow":
|
||||
return similar
|
||||
elif return_type == "pandas":
|
||||
return similar.to_pandas()
|
||||
|
||||
def plot_similar(
|
||||
self,
|
||||
img: Union[str, np.ndarray, List[str], List[np.ndarray]] = None,
|
||||
idx: Union[int, List[int]] = None,
|
||||
limit: int = 25,
|
||||
labels: bool = True,
|
||||
) -> Image.Image:
|
||||
"""
|
||||
Plot the similar images. Accepts images or indexes.
|
||||
|
||||
Args:
|
||||
img (str or list): Path to the image or a list of paths to the images.
|
||||
idx (int or list): Index of the image in the table or a list of indexes.
|
||||
labels (bool): Whether to plot the labels or not.
|
||||
limit (int): Number of results to return. Defaults to 25.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
similar = exp.plot_similar(img='https://ultralytics.com/images/zidane.jpg')
|
||||
```
|
||||
"""
|
||||
similar = self.get_similar(img, idx, limit, return_type="arrow")
|
||||
if len(similar) == 0:
|
||||
LOGGER.info("No results found.")
|
||||
return None
|
||||
img = plot_query_result(similar, plot_labels=labels)
|
||||
return Image.fromarray(img)
|
||||
|
||||
def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> DataFrame:
|
||||
"""
|
||||
Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
|
||||
are max_dist or closer to the image in the embedding space at a given index.
|
||||
|
||||
Args:
|
||||
max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
|
||||
top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
|
||||
vector search. Defaults: None.
|
||||
force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
|
||||
include indices of similar images and their respective distances.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
sim_idx = exp.similarity_index()
|
||||
```
|
||||
"""
|
||||
if self.table is None:
|
||||
raise ValueError("Table is not created. Please create the table first.")
|
||||
sim_idx_table_name = f"{self.sim_idx_base_name}_thres_{max_dist}_top_{top_k}".lower()
|
||||
if sim_idx_table_name in self.connection.table_names() and not force:
|
||||
LOGGER.info("Similarity matrix already exists. Reusing it. Pass force=True to overwrite it.")
|
||||
return self.connection.open_table(sim_idx_table_name).to_pandas()
|
||||
|
||||
if top_k and not (1.0 >= top_k >= 0.0):
|
||||
raise ValueError(f"top_k must be between 0.0 and 1.0. Got {top_k}")
|
||||
if max_dist < 0.0:
|
||||
raise ValueError(f"max_dist must be greater than 0. Got {max_dist}")
|
||||
|
||||
top_k = int(top_k * len(self.table)) if top_k else len(self.table)
|
||||
top_k = max(top_k, 1)
|
||||
features = self.table.to_lance().to_table(columns=["vector", "im_file"]).to_pydict()
|
||||
im_files = features["im_file"]
|
||||
embeddings = features["vector"]
|
||||
|
||||
sim_table = self.connection.create_table(sim_idx_table_name, schema=get_sim_index_schema(), mode="overwrite")
|
||||
|
||||
def _yield_sim_idx():
|
||||
"""Generates a dataframe with similarity indices and distances for images."""
|
||||
for i in tqdm(range(len(embeddings))):
|
||||
sim_idx = self.table.search(embeddings[i]).limit(top_k).to_pandas().query(f"_distance <= {max_dist}")
|
||||
yield [
|
||||
{
|
||||
"idx": i,
|
||||
"im_file": im_files[i],
|
||||
"count": len(sim_idx),
|
||||
"sim_im_files": sim_idx["im_file"].tolist(),
|
||||
}
|
||||
]
|
||||
|
||||
sim_table.add(_yield_sim_idx())
|
||||
self.sim_index = sim_table
|
||||
return sim_table.to_pandas()
|
||||
|
||||
def plot_similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Image:
|
||||
"""
|
||||
Plot the similarity index of all the images in the table. Here, the index will contain the data points that are
|
||||
max_dist or closer to the image in the embedding space at a given index.
|
||||
|
||||
Args:
|
||||
max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
|
||||
top_k (float): Percentage of closest data points to consider when counting. Used to apply limit when
|
||||
running vector search. Defaults to 0.01.
|
||||
force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(PIL.Image): Image containing the plot.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
|
||||
similarity_idx_plot = exp.plot_similarity_index()
|
||||
similarity_idx_plot.show() # view image preview
|
||||
similarity_idx_plot.save('path/to/save/similarity_index_plot.png') # save contents to file
|
||||
```
|
||||
"""
|
||||
sim_idx = self.similarity_index(max_dist=max_dist, top_k=top_k, force=force)
|
||||
sim_count = sim_idx["count"].tolist()
|
||||
sim_count = np.array(sim_count)
|
||||
|
||||
indices = np.arange(len(sim_count))
|
||||
|
||||
# Create the bar plot
|
||||
plt.bar(indices, sim_count)
|
||||
|
||||
# Customize the plot (optional)
|
||||
plt.xlabel("data idx")
|
||||
plt.ylabel("Count")
|
||||
plt.title("Similarity Count")
|
||||
buffer = BytesIO()
|
||||
plt.savefig(buffer, format="png")
|
||||
buffer.seek(0)
|
||||
|
||||
# Use Pillow to open the image from the buffer
|
||||
return Image.fromarray(np.array(Image.open(buffer)))
|
||||
|
||||
def _check_imgs_or_idxs(
|
||||
self, img: Union[str, np.ndarray, List[str], List[np.ndarray], None], idx: Union[None, int, List[int]]
|
||||
) -> List[np.ndarray]:
|
||||
if img is None and idx is None:
|
||||
raise ValueError("Either img or idx must be provided.")
|
||||
if img is not None and idx is not None:
|
||||
raise ValueError("Only one of img or idx must be provided.")
|
||||
if idx is not None:
|
||||
idx = idx if isinstance(idx, list) else [idx]
|
||||
img = self.table.to_lance().take(idx, columns=["im_file"]).to_pydict()["im_file"]
|
||||
|
||||
return img if isinstance(img, list) else [img]
|
||||
|
||||
def ask_ai(self, query):
|
||||
"""
|
||||
Ask AI a question.
|
||||
|
||||
Args:
|
||||
query (str): Question to ask.
|
||||
|
||||
Returns:
|
||||
(pandas.DataFrame): A dataframe containing filtered results to the SQL query.
|
||||
|
||||
Example:
|
||||
```python
|
||||
exp = Explorer()
|
||||
exp.create_embeddings_table()
|
||||
answer = exp.ask_ai('Show images with 1 person and 2 dogs')
|
||||
```
|
||||
"""
|
||||
result = prompt_sql_query(query)
|
||||
try:
|
||||
df = self.sql_query(result)
|
||||
except Exception as e:
|
||||
LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
|
||||
LOGGER.error(e)
|
||||
return None
|
||||
return df
|
||||
|
||||
def visualize(self, result):
|
||||
"""
|
||||
Visualize the results of a query. TODO.
|
||||
|
||||
Args:
|
||||
result (pyarrow.Table): Table containing the results of a query.
|
||||
"""
|
||||
pass
|
||||
|
||||
def generate_report(self, result):
|
||||
"""
|
||||
Generate a report of the dataset.
|
||||
|
||||
TODO
|
||||
"""
|
||||
pass
|
1
ultralytics/data/explorer/gui/__init__.py
Normal file
1
ultralytics/data/explorer/gui/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
268
ultralytics/data/explorer/gui/dash.py
Normal file
268
ultralytics/data/explorer/gui/dash.py
Normal file
@ -0,0 +1,268 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import time
|
||||
from threading import Thread
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from ultralytics import Explorer
|
||||
from ultralytics.utils import ROOT, SETTINGS
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements(("streamlit>=1.29.0", "streamlit-select>=0.3"))
|
||||
|
||||
import streamlit as st
|
||||
from streamlit_select import image_select
|
||||
|
||||
|
||||
def _get_explorer():
|
||||
"""Initializes and returns an instance of the Explorer class."""
|
||||
exp = Explorer(data=st.session_state.get("dataset"), model=st.session_state.get("model"))
|
||||
thread = Thread(
|
||||
target=exp.create_embeddings_table, kwargs={"force": st.session_state.get("force_recreate_embeddings")}
|
||||
)
|
||||
thread.start()
|
||||
progress_bar = st.progress(0, text="Creating embeddings table...")
|
||||
while exp.progress < 1:
|
||||
time.sleep(0.1)
|
||||
progress_bar.progress(exp.progress, text=f"Progress: {exp.progress * 100}%")
|
||||
thread.join()
|
||||
st.session_state["explorer"] = exp
|
||||
progress_bar.empty()
|
||||
|
||||
|
||||
def init_explorer_form():
|
||||
"""Initializes an Explorer instance and creates embeddings table with progress tracking."""
|
||||
datasets = ROOT / "cfg" / "datasets"
|
||||
ds = [d.name for d in datasets.glob("*.yaml")]
|
||||
models = [
|
||||
"yolov8n.pt",
|
||||
"yolov8s.pt",
|
||||
"yolov8m.pt",
|
||||
"yolov8l.pt",
|
||||
"yolov8x.pt",
|
||||
"yolov8n-seg.pt",
|
||||
"yolov8s-seg.pt",
|
||||
"yolov8m-seg.pt",
|
||||
"yolov8l-seg.pt",
|
||||
"yolov8x-seg.pt",
|
||||
"yolov8n-pose.pt",
|
||||
"yolov8s-pose.pt",
|
||||
"yolov8m-pose.pt",
|
||||
"yolov8l-pose.pt",
|
||||
"yolov8x-pose.pt",
|
||||
]
|
||||
with st.form(key="explorer_init_form"):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.selectbox("Select dataset", ds, key="dataset", index=ds.index("coco128.yaml"))
|
||||
with col2:
|
||||
st.selectbox("Select model", models, key="model")
|
||||
st.checkbox("Force recreate embeddings", key="force_recreate_embeddings")
|
||||
|
||||
st.form_submit_button("Explore", on_click=_get_explorer)
|
||||
|
||||
|
||||
def query_form():
|
||||
"""Sets up a form in Streamlit to initialize Explorer with dataset and model selection."""
|
||||
with st.form("query_form"):
|
||||
col1, col2 = st.columns([0.8, 0.2])
|
||||
with col1:
|
||||
st.text_input(
|
||||
"Query",
|
||||
"WHERE labels LIKE '%person%' AND labels LIKE '%dog%'",
|
||||
label_visibility="collapsed",
|
||||
key="query",
|
||||
)
|
||||
with col2:
|
||||
st.form_submit_button("Query", on_click=run_sql_query)
|
||||
|
||||
|
||||
def ai_query_form():
|
||||
"""Sets up a Streamlit form for user input to initialize Explorer with dataset and model selection."""
|
||||
with st.form("ai_query_form"):
|
||||
col1, col2 = st.columns([0.8, 0.2])
|
||||
with col1:
|
||||
st.text_input("Query", "Show images with 1 person and 1 dog", label_visibility="collapsed", key="ai_query")
|
||||
with col2:
|
||||
st.form_submit_button("Ask AI", on_click=run_ai_query)
|
||||
|
||||
|
||||
def find_similar_imgs(imgs):
|
||||
"""Initializes a Streamlit form for AI-based image querying with custom input."""
|
||||
exp = st.session_state["explorer"]
|
||||
similar = exp.get_similar(img=imgs, limit=st.session_state.get("limit"), return_type="arrow")
|
||||
paths = similar.to_pydict()["im_file"]
|
||||
st.session_state["imgs"] = paths
|
||||
st.session_state["res"] = similar
|
||||
|
||||
|
||||
def similarity_form(selected_imgs):
|
||||
"""Initializes a form for AI-based image querying with custom input in Streamlit."""
|
||||
st.write("Similarity Search")
|
||||
with st.form("similarity_form"):
|
||||
subcol1, subcol2 = st.columns([1, 1])
|
||||
with subcol1:
|
||||
st.number_input(
|
||||
"limit", min_value=None, max_value=None, value=25, label_visibility="collapsed", key="limit"
|
||||
)
|
||||
|
||||
with subcol2:
|
||||
disabled = not len(selected_imgs)
|
||||
st.write("Selected: ", len(selected_imgs))
|
||||
st.form_submit_button(
|
||||
"Search",
|
||||
disabled=disabled,
|
||||
on_click=find_similar_imgs,
|
||||
args=(selected_imgs,),
|
||||
)
|
||||
if disabled:
|
||||
st.error("Select at least one image to search.")
|
||||
|
||||
|
||||
# def persist_reset_form():
|
||||
# with st.form("persist_reset"):
|
||||
# col1, col2 = st.columns([1, 1])
|
||||
# with col1:
|
||||
# st.form_submit_button("Reset", on_click=reset)
|
||||
#
|
||||
# with col2:
|
||||
# st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
|
||||
|
||||
|
||||
def run_sql_query():
|
||||
"""Executes an SQL query and returns the results."""
|
||||
st.session_state["error"] = None
|
||||
query = st.session_state.get("query")
|
||||
if query.rstrip().lstrip():
|
||||
exp = st.session_state["explorer"]
|
||||
res = exp.sql_query(query, return_type="arrow")
|
||||
st.session_state["imgs"] = res.to_pydict()["im_file"]
|
||||
st.session_state["res"] = res
|
||||
|
||||
|
||||
def run_ai_query():
|
||||
"""Execute SQL query and update session state with query results."""
|
||||
if not SETTINGS["openai_api_key"]:
|
||||
st.session_state["error"] = (
|
||||
'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
|
||||
)
|
||||
return
|
||||
st.session_state["error"] = None
|
||||
query = st.session_state.get("ai_query")
|
||||
if query.rstrip().lstrip():
|
||||
exp = st.session_state["explorer"]
|
||||
res = exp.ask_ai(query)
|
||||
if not isinstance(res, pd.DataFrame) or res.empty:
|
||||
st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
|
||||
return
|
||||
st.session_state["imgs"] = res["im_file"].to_list()
|
||||
st.session_state["res"] = res
|
||||
|
||||
|
||||
def reset_explorer():
|
||||
"""Resets the explorer to its initial state by clearing session variables."""
|
||||
st.session_state["explorer"] = None
|
||||
st.session_state["imgs"] = None
|
||||
st.session_state["error"] = None
|
||||
|
||||
|
||||
def utralytics_explorer_docs_callback():
|
||||
"""Resets the explorer to its initial state by clearing session variables."""
|
||||
with st.container(border=True):
|
||||
st.image(
|
||||
"https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg",
|
||||
width=100,
|
||||
)
|
||||
st.markdown(
|
||||
"<p>This demo is built using Ultralytics Explorer API. Visit <a href='https://docs.ultralytics.com/datasets/explorer/'>API docs</a> to try examples & learn more</p>",
|
||||
unsafe_allow_html=True,
|
||||
help=None,
|
||||
)
|
||||
st.link_button("Ultrlaytics Explorer API", "https://docs.ultralytics.com/datasets/explorer/")
|
||||
|
||||
|
||||
def layout():
|
||||
"""Resets explorer session variables and provides documentation with a link to API docs."""
|
||||
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
|
||||
st.markdown("<h1 style='text-align: center;'>Ultralytics Explorer Demo</h1>", unsafe_allow_html=True)
|
||||
|
||||
if st.session_state.get("explorer") is None:
|
||||
init_explorer_form()
|
||||
return
|
||||
|
||||
st.button(":arrow_backward: Select Dataset", on_click=reset_explorer)
|
||||
exp = st.session_state.get("explorer")
|
||||
col1, col2 = st.columns([0.75, 0.25], gap="small")
|
||||
imgs = []
|
||||
if st.session_state.get("error"):
|
||||
st.error(st.session_state["error"])
|
||||
else:
|
||||
if st.session_state.get("imgs"):
|
||||
imgs = st.session_state.get("imgs")
|
||||
else:
|
||||
imgs = exp.table.to_lance().to_table(columns=["im_file"]).to_pydict()["im_file"]
|
||||
st.session_state["res"] = exp.table.to_arrow()
|
||||
total_imgs, selected_imgs = len(imgs), []
|
||||
with col1:
|
||||
subcol1, subcol2, subcol3, subcol4, subcol5 = st.columns(5)
|
||||
with subcol1:
|
||||
st.write("Max Images Displayed:")
|
||||
with subcol2:
|
||||
num = st.number_input(
|
||||
"Max Images Displayed",
|
||||
min_value=0,
|
||||
max_value=total_imgs,
|
||||
value=min(500, total_imgs),
|
||||
key="num_imgs_displayed",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
with subcol3:
|
||||
st.write("Start Index:")
|
||||
with subcol4:
|
||||
start_idx = st.number_input(
|
||||
"Start Index",
|
||||
min_value=0,
|
||||
max_value=total_imgs,
|
||||
value=0,
|
||||
key="start_index",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
with subcol5:
|
||||
reset = st.button("Reset", use_container_width=False, key="reset")
|
||||
if reset:
|
||||
st.session_state["imgs"] = None
|
||||
st.experimental_rerun()
|
||||
|
||||
query_form()
|
||||
ai_query_form()
|
||||
if total_imgs:
|
||||
labels, boxes, masks, kpts, classes = None, None, None, None, None
|
||||
task = exp.model.task
|
||||
if st.session_state.get("display_labels"):
|
||||
labels = st.session_state.get("res").to_pydict()["labels"][start_idx : start_idx + num]
|
||||
boxes = st.session_state.get("res").to_pydict()["bboxes"][start_idx : start_idx + num]
|
||||
masks = st.session_state.get("res").to_pydict()["masks"][start_idx : start_idx + num]
|
||||
kpts = st.session_state.get("res").to_pydict()["keypoints"][start_idx : start_idx + num]
|
||||
classes = st.session_state.get("res").to_pydict()["cls"][start_idx : start_idx + num]
|
||||
imgs_displayed = imgs[start_idx : start_idx + num]
|
||||
selected_imgs = image_select(
|
||||
f"Total samples: {total_imgs}",
|
||||
images=imgs_displayed,
|
||||
use_container_width=False,
|
||||
# indices=[i for i in range(num)] if select_all else None,
|
||||
labels=labels,
|
||||
classes=classes,
|
||||
bboxes=boxes,
|
||||
masks=masks if task == "segment" else None,
|
||||
kpts=kpts if task == "pose" else None,
|
||||
)
|
||||
|
||||
with col2:
|
||||
similarity_form(selected_imgs)
|
||||
display_labels = st.checkbox("Labels", value=False, key="display_labels")
|
||||
utralytics_explorer_docs_callback()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
layout()
|
166
ultralytics/data/explorer/utils.py
Normal file
166
ultralytics/data/explorer/utils.py
Normal file
@ -0,0 +1,166 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import getpass
|
||||
from typing import List
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from ultralytics.data.augment import LetterBox
|
||||
from ultralytics.utils import LOGGER as logger
|
||||
from ultralytics.utils import SETTINGS
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
from ultralytics.utils.ops import xyxy2xywh
|
||||
from ultralytics.utils.plotting import plot_images
|
||||
|
||||
|
||||
def get_table_schema(vector_size):
|
||||
"""Extracts and returns the schema of a database table."""
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
|
||||
class Schema(LanceModel):
|
||||
im_file: str
|
||||
labels: List[str]
|
||||
cls: List[int]
|
||||
bboxes: List[List[float]]
|
||||
masks: List[List[List[int]]]
|
||||
keypoints: List[List[List[float]]]
|
||||
vector: Vector(vector_size)
|
||||
|
||||
return Schema
|
||||
|
||||
|
||||
def get_sim_index_schema():
|
||||
"""Returns a LanceModel schema for a database table with specified vector size."""
|
||||
from lancedb.pydantic import LanceModel
|
||||
|
||||
class Schema(LanceModel):
|
||||
idx: int
|
||||
im_file: str
|
||||
count: int
|
||||
sim_im_files: List[str]
|
||||
|
||||
return Schema
|
||||
|
||||
|
||||
def sanitize_batch(batch, dataset_info):
|
||||
"""Sanitizes input batch for inference, ensuring correct format and dimensions."""
|
||||
batch["cls"] = batch["cls"].flatten().int().tolist()
|
||||
box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
|
||||
batch["bboxes"] = [box for box, _ in box_cls_pair]
|
||||
batch["cls"] = [cls for _, cls in box_cls_pair]
|
||||
batch["labels"] = [dataset_info["names"][i] for i in batch["cls"]]
|
||||
batch["masks"] = batch["masks"].tolist() if "masks" in batch else [[[]]]
|
||||
batch["keypoints"] = batch["keypoints"].tolist() if "keypoints" in batch else [[[]]]
|
||||
return batch
|
||||
|
||||
|
||||
def plot_query_result(similar_set, plot_labels=True):
|
||||
"""
|
||||
Plot images from the similar set.
|
||||
|
||||
Args:
|
||||
similar_set (list): Pyarrow or pandas object containing the similar data points
|
||||
plot_labels (bool): Whether to plot labels or not
|
||||
"""
|
||||
similar_set = (
|
||||
similar_set.to_dict(orient="list") if isinstance(similar_set, pd.DataFrame) else similar_set.to_pydict()
|
||||
)
|
||||
empty_masks = [[[]]]
|
||||
empty_boxes = [[]]
|
||||
images = similar_set.get("im_file", [])
|
||||
bboxes = similar_set.get("bboxes", []) if similar_set.get("bboxes") is not empty_boxes else []
|
||||
masks = similar_set.get("masks") if similar_set.get("masks")[0] != empty_masks else []
|
||||
kpts = similar_set.get("keypoints") if similar_set.get("keypoints")[0] != empty_masks else []
|
||||
cls = similar_set.get("cls", [])
|
||||
|
||||
plot_size = 640
|
||||
imgs, batch_idx, plot_boxes, plot_masks, plot_kpts = [], [], [], [], []
|
||||
for i, imf in enumerate(images):
|
||||
im = cv2.imread(imf)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
h, w = im.shape[:2]
|
||||
r = min(plot_size / h, plot_size / w)
|
||||
imgs.append(LetterBox(plot_size, center=False)(image=im).transpose(2, 0, 1))
|
||||
if plot_labels:
|
||||
if len(bboxes) > i and len(bboxes[i]) > 0:
|
||||
box = np.array(bboxes[i], dtype=np.float32)
|
||||
box[:, [0, 2]] *= r
|
||||
box[:, [1, 3]] *= r
|
||||
plot_boxes.append(box)
|
||||
if len(masks) > i and len(masks[i]) > 0:
|
||||
mask = np.array(masks[i], dtype=np.uint8)[0]
|
||||
plot_masks.append(LetterBox(plot_size, center=False)(image=mask))
|
||||
if len(kpts) > i and kpts[i] is not None:
|
||||
kpt = np.array(kpts[i], dtype=np.float32)
|
||||
kpt[:, :, :2] *= r
|
||||
plot_kpts.append(kpt)
|
||||
batch_idx.append(np.ones(len(np.array(bboxes[i], dtype=np.float32))) * i)
|
||||
imgs = np.stack(imgs, axis=0)
|
||||
masks = np.stack(plot_masks, axis=0) if plot_masks else np.zeros(0, dtype=np.uint8)
|
||||
kpts = np.concatenate(plot_kpts, axis=0) if plot_kpts else np.zeros((0, 51), dtype=np.float32)
|
||||
boxes = xyxy2xywh(np.concatenate(plot_boxes, axis=0)) if plot_boxes else np.zeros(0, dtype=np.float32)
|
||||
batch_idx = np.concatenate(batch_idx, axis=0)
|
||||
cls = np.concatenate([np.array(c, dtype=np.int32) for c in cls], axis=0)
|
||||
|
||||
return plot_images(
|
||||
imgs, batch_idx, cls, bboxes=boxes, masks=masks, kpts=kpts, max_subplots=len(images), save=False, threaded=False
|
||||
)
|
||||
|
||||
|
||||
def prompt_sql_query(query):
|
||||
"""Plots images with optional labels from a similar data set."""
|
||||
check_requirements("openai>=1.6.1")
|
||||
from openai import OpenAI
|
||||
|
||||
if not SETTINGS["openai_api_key"]:
|
||||
logger.warning("OpenAI API key not found in settings. Please enter your API key below.")
|
||||
openai_api_key = getpass.getpass("OpenAI API key: ")
|
||||
SETTINGS.update({"openai_api_key": openai_api_key})
|
||||
openai = OpenAI(api_key=SETTINGS["openai_api_key"])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": """
|
||||
You are a helpful data scientist proficient in SQL. You need to output exactly one SQL query based on
|
||||
the following schema and a user request. You only need to output the format with fixed selection
|
||||
statement that selects everything from "'table'", like `SELECT * from 'table'`
|
||||
|
||||
Schema:
|
||||
im_file: string not null
|
||||
labels: list<item: string> not null
|
||||
child 0, item: string
|
||||
cls: list<item: int64> not null
|
||||
child 0, item: int64
|
||||
bboxes: list<item: list<item: double>> not null
|
||||
child 0, item: list<item: double>
|
||||
child 0, item: double
|
||||
masks: list<item: list<item: list<item: int64>>> not null
|
||||
child 0, item: list<item: list<item: int64>>
|
||||
child 0, item: list<item: int64>
|
||||
child 0, item: int64
|
||||
keypoints: list<item: list<item: list<item: double>>> not null
|
||||
child 0, item: list<item: list<item: double>>
|
||||
child 0, item: list<item: double>
|
||||
child 0, item: double
|
||||
vector: fixed_size_list<item: float>[256] not null
|
||||
child 0, item: float
|
||||
|
||||
Some details about the schema:
|
||||
- the "labels" column contains the string values like 'person' and 'dog' for the respective objects
|
||||
in each image
|
||||
- the "cls" column contains the integer values on these classes that map them the labels
|
||||
|
||||
Example of a correct query:
|
||||
request - Get all data points that contain 2 or more people and at least one dog
|
||||
correct query-
|
||||
SELECT * FROM 'table' WHERE ARRAY_LENGTH(cls) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'person')) >= 2 AND ARRAY_LENGTH(FILTER(labels, x -> x = 'dog')) >= 1;
|
||||
""",
|
||||
},
|
||||
{"role": "user", "content": f"{query}"},
|
||||
]
|
||||
|
||||
response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
|
||||
return response.choices[0].message.content
|
555
ultralytics/data/loaders.py
Normal file
555
ultralytics/data/loaders.py
Normal file
@ -0,0 +1,555 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import glob
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import requests
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
|
||||
from ultralytics.utils import LOGGER, is_colab, is_kaggle, ops
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceTypes:
|
||||
"""Class to represent various types of input sources for predictions."""
|
||||
|
||||
stream: bool = False
|
||||
screenshot: bool = False
|
||||
from_img: bool = False
|
||||
tensor: bool = False
|
||||
|
||||
|
||||
class LoadStreams:
|
||||
"""
|
||||
Stream Loader for various types of video streams, Supports RTSP, RTMP, HTTP, and TCP streams.
|
||||
|
||||
Attributes:
|
||||
sources (str): The source input paths or URLs for the video streams.
|
||||
vid_stride (int): Video frame-rate stride, defaults to 1.
|
||||
buffer (bool): Whether to buffer input streams, defaults to False.
|
||||
running (bool): Flag to indicate if the streaming thread is running.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
imgs (list): List of image frames for each stream.
|
||||
fps (list): List of FPS for each stream.
|
||||
frames (list): List of total frames for each stream.
|
||||
threads (list): List of threads for each stream.
|
||||
shape (list): List of shapes for each stream.
|
||||
caps (list): List of cv2.VideoCapture objects for each stream.
|
||||
bs (int): Batch size for processing.
|
||||
|
||||
Methods:
|
||||
__init__: Initialize the stream loader.
|
||||
update: Read stream frames in daemon thread.
|
||||
close: Close stream loader and release resources.
|
||||
__iter__: Returns an iterator object for the class.
|
||||
__next__: Returns source paths, transformed, and original images for processing.
|
||||
__len__: Return the length of the sources object.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
yolo predict source='rtsp://example.com/media.mp4'
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, sources="file.streams", vid_stride=1, buffer=False):
|
||||
"""Initialize instance variables and check for consistent input stream shapes."""
|
||||
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
||||
self.buffer = buffer # buffer input streams
|
||||
self.running = True # running flag for Thread
|
||||
self.mode = "stream"
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
|
||||
sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources]
|
||||
n = len(sources)
|
||||
self.bs = n
|
||||
self.fps = [0] * n # frames per second
|
||||
self.frames = [0] * n
|
||||
self.threads = [None] * n
|
||||
self.caps = [None] * n # video capture objects
|
||||
self.imgs = [[] for _ in range(n)] # images
|
||||
self.shape = [[] for _ in range(n)] # image shapes
|
||||
self.sources = [ops.clean_str(x) for x in sources] # clean source names for later
|
||||
for i, s in enumerate(sources): # index, source
|
||||
# Start thread to read frames from video stream
|
||||
st = f"{i + 1}/{n}: {s}... "
|
||||
if urlparse(s).hostname in ("www.youtube.com", "youtube.com", "youtu.be"): # if source is YouTube video
|
||||
# YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/LNwODJXcvt4'
|
||||
s = get_best_youtube_url(s)
|
||||
s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
|
||||
if s == 0 and (is_colab() or is_kaggle()):
|
||||
raise NotImplementedError(
|
||||
"'source=0' webcam not supported in Colab and Kaggle notebooks. "
|
||||
"Try running 'source=0' in a local environment."
|
||||
)
|
||||
self.caps[i] = cv2.VideoCapture(s) # store video capture object
|
||||
if not self.caps[i].isOpened():
|
||||
raise ConnectionError(f"{st}Failed to open {s}")
|
||||
w = int(self.caps[i].get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
h = int(self.caps[i].get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
fps = self.caps[i].get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan
|
||||
self.frames[i] = max(int(self.caps[i].get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float(
|
||||
"inf"
|
||||
) # infinite stream fallback
|
||||
self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback
|
||||
|
||||
success, im = self.caps[i].read() # guarantee first frame
|
||||
if not success or im is None:
|
||||
raise ConnectionError(f"{st}Failed to read images from {s}")
|
||||
self.imgs[i].append(im)
|
||||
self.shape[i] = im.shape
|
||||
self.threads[i] = Thread(target=self.update, args=([i, self.caps[i], s]), daemon=True)
|
||||
LOGGER.info(f"{st}Success ✅ ({self.frames[i]} frames of shape {w}x{h} at {self.fps[i]:.2f} FPS)")
|
||||
self.threads[i].start()
|
||||
LOGGER.info("") # newline
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
"""Read stream `i` frames in daemon thread."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
while self.running and cap.isOpened() and n < (f - 1):
|
||||
if len(self.imgs[i]) < 30: # keep a <=30-image buffer
|
||||
n += 1
|
||||
cap.grab() # .read() = .grab() followed by .retrieve()
|
||||
if n % self.vid_stride == 0:
|
||||
success, im = cap.retrieve()
|
||||
if not success:
|
||||
im = np.zeros(self.shape[i], dtype=np.uint8)
|
||||
LOGGER.warning("WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.")
|
||||
cap.open(stream) # re-open stream if signal was lost
|
||||
if self.buffer:
|
||||
self.imgs[i].append(im)
|
||||
else:
|
||||
self.imgs[i] = [im]
|
||||
else:
|
||||
time.sleep(0.01) # wait until the buffer is empty
|
||||
|
||||
def close(self):
|
||||
"""Close stream loader and release resources."""
|
||||
self.running = False # stop flag for Thread
|
||||
for thread in self.threads:
|
||||
if thread.is_alive():
|
||||
thread.join(timeout=5) # Add timeout
|
||||
for cap in self.caps: # Iterate through the stored VideoCapture objects
|
||||
try:
|
||||
cap.release() # release video capture
|
||||
except Exception as e:
|
||||
LOGGER.warning(f"WARNING ⚠️ Could not release VideoCapture object: {e}")
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterates through YOLO image feed and re-opens unresponsive streams."""
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Returns source paths, transformed and original images for processing."""
|
||||
self.count += 1
|
||||
|
||||
images = []
|
||||
for i, x in enumerate(self.imgs):
|
||||
# Wait until a frame is available in each buffer
|
||||
while not x:
|
||||
if not self.threads[i].is_alive() or cv2.waitKey(1) == ord("q"): # q to quit
|
||||
self.close()
|
||||
raise StopIteration
|
||||
time.sleep(1 / min(self.fps))
|
||||
x = self.imgs[i]
|
||||
if not x:
|
||||
LOGGER.warning(f"WARNING ⚠️ Waiting for stream {i}")
|
||||
|
||||
# Get and remove the first frame from imgs buffer
|
||||
if self.buffer:
|
||||
images.append(x.pop(0))
|
||||
|
||||
# Get the last frame, and clear the rest from the imgs buffer
|
||||
else:
|
||||
images.append(x.pop(-1) if x else np.zeros(self.shape[i], dtype=np.uint8))
|
||||
x.clear()
|
||||
|
||||
return self.sources, images, [""] * self.bs
|
||||
|
||||
def __len__(self):
|
||||
"""Return the length of the sources object."""
|
||||
return self.bs # 1E12 frames = 32 streams at 30 FPS for 30 years
|
||||
|
||||
|
||||
class LoadScreenshots:
|
||||
"""
|
||||
YOLOv8 screenshot dataloader.
|
||||
|
||||
This class manages the loading of screenshot images for processing with YOLOv8.
|
||||
Suitable for use with `yolo predict source=screen`.
|
||||
|
||||
Attributes:
|
||||
source (str): The source input indicating which screen to capture.
|
||||
screen (int): The screen number to capture.
|
||||
left (int): The left coordinate for screen capture area.
|
||||
top (int): The top coordinate for screen capture area.
|
||||
width (int): The width of the screen capture area.
|
||||
height (int): The height of the screen capture area.
|
||||
mode (str): Set to 'stream' indicating real-time capture.
|
||||
frame (int): Counter for captured frames.
|
||||
sct (mss.mss): Screen capture object from `mss` library.
|
||||
bs (int): Batch size, set to 1.
|
||||
monitor (dict): Monitor configuration details.
|
||||
|
||||
Methods:
|
||||
__iter__: Returns an iterator object.
|
||||
__next__: Captures the next screenshot and returns it.
|
||||
"""
|
||||
|
||||
def __init__(self, source):
|
||||
"""Source = [screen_number left top width height] (pixels)."""
|
||||
check_requirements("mss")
|
||||
import mss # noqa
|
||||
|
||||
source, *params = source.split()
|
||||
self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0
|
||||
if len(params) == 1:
|
||||
self.screen = int(params[0])
|
||||
elif len(params) == 4:
|
||||
left, top, width, height = (int(x) for x in params)
|
||||
elif len(params) == 5:
|
||||
self.screen, left, top, width, height = (int(x) for x in params)
|
||||
self.mode = "stream"
|
||||
self.frame = 0
|
||||
self.sct = mss.mss()
|
||||
self.bs = 1
|
||||
self.fps = 30
|
||||
|
||||
# Parse monitor shape
|
||||
monitor = self.sct.monitors[self.screen]
|
||||
self.top = monitor["top"] if top is None else (monitor["top"] + top)
|
||||
self.left = monitor["left"] if left is None else (monitor["left"] + left)
|
||||
self.width = width or monitor["width"]
|
||||
self.height = height or monitor["height"]
|
||||
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator of the object."""
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""mss screen capture: get raw pixels from the screen as np array."""
|
||||
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
||||
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
||||
|
||||
self.frame += 1
|
||||
return [str(self.screen)], [im0], [s] # screen, img, string
|
||||
|
||||
|
||||
class LoadImagesAndVideos:
|
||||
"""
|
||||
YOLOv8 image/video dataloader.
|
||||
|
||||
This class manages the loading and pre-processing of image and video data for YOLOv8. It supports loading from
|
||||
various formats, including single image files, video files, and lists of image and video paths.
|
||||
|
||||
Attributes:
|
||||
files (list): List of image and video file paths.
|
||||
nf (int): Total number of files (images and videos).
|
||||
video_flag (list): Flags indicating whether a file is a video (True) or an image (False).
|
||||
mode (str): Current mode, 'image' or 'video'.
|
||||
vid_stride (int): Stride for video frame-rate, defaults to 1.
|
||||
bs (int): Batch size, set to 1 for this class.
|
||||
cap (cv2.VideoCapture): Video capture object for OpenCV.
|
||||
frame (int): Frame counter for video.
|
||||
frames (int): Total number of frames in the video.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_new_video(path): Create a new cv2.VideoCapture object for a given video path.
|
||||
"""
|
||||
|
||||
def __init__(self, path, batch=1, vid_stride=1):
|
||||
"""Initialize the Dataloader and raise FileNotFoundError if file not found."""
|
||||
parent = None
|
||||
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
|
||||
parent = Path(path).parent
|
||||
path = Path(path).read_text().splitlines() # list of sources
|
||||
files = []
|
||||
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
|
||||
a = str(Path(p).absolute()) # do not use .resolve() https://github.com/ultralytics/ultralytics/issues/2912
|
||||
if "*" in a:
|
||||
files.extend(sorted(glob.glob(a, recursive=True))) # glob
|
||||
elif os.path.isdir(a):
|
||||
files.extend(sorted(glob.glob(os.path.join(a, "*.*")))) # dir
|
||||
elif os.path.isfile(a):
|
||||
files.append(a) # files (absolute or relative to CWD)
|
||||
elif parent and (parent / p).is_file():
|
||||
files.append(str((parent / p).absolute())) # files (relative to *.txt file parent)
|
||||
else:
|
||||
raise FileNotFoundError(f"{p} does not exist")
|
||||
|
||||
images = [x for x in files if x.split(".")[-1].lower() in IMG_FORMATS]
|
||||
videos = [x for x in files if x.split(".")[-1].lower() in VID_FORMATS]
|
||||
ni, nv = len(images), len(videos)
|
||||
|
||||
self.files = images + videos
|
||||
self.nf = ni + nv # number of files
|
||||
self.ni = ni # number of images
|
||||
self.video_flag = [False] * ni + [True] * nv
|
||||
self.mode = "image"
|
||||
self.vid_stride = vid_stride # video frame-rate stride
|
||||
self.bs = batch
|
||||
if any(videos):
|
||||
self._new_video(videos[0]) # new video
|
||||
else:
|
||||
self.cap = None
|
||||
if self.nf == 0:
|
||||
raise FileNotFoundError(
|
||||
f"No images or videos found in {p}. "
|
||||
f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
||||
)
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator object for VideoStream or ImageFolder."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Returns the next batch of images or video frames along with their paths and metadata."""
|
||||
paths, imgs, info = [], [], []
|
||||
while len(imgs) < self.bs:
|
||||
if self.count >= self.nf: # end of file list
|
||||
if len(imgs) > 0:
|
||||
return paths, imgs, info # return last partial batch
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
path = self.files[self.count]
|
||||
if self.video_flag[self.count]:
|
||||
self.mode = "video"
|
||||
if not self.cap or not self.cap.isOpened():
|
||||
self._new_video(path)
|
||||
|
||||
for _ in range(self.vid_stride):
|
||||
success = self.cap.grab()
|
||||
if not success:
|
||||
break # end of video or failure
|
||||
|
||||
if success:
|
||||
success, im0 = self.cap.retrieve()
|
||||
if success:
|
||||
self.frame += 1
|
||||
paths.append(path)
|
||||
imgs.append(im0)
|
||||
info.append(f"video {self.count + 1}/{self.nf} (frame {self.frame}/{self.frames}) {path}: ")
|
||||
if self.frame == self.frames: # end of video
|
||||
self.count += 1
|
||||
self.cap.release()
|
||||
else:
|
||||
# Move to the next file if the current video ended or failed to open
|
||||
self.count += 1
|
||||
if self.cap:
|
||||
self.cap.release()
|
||||
if self.count < self.nf:
|
||||
self._new_video(self.files[self.count])
|
||||
else:
|
||||
self.mode = "image"
|
||||
im0 = cv2.imread(path) # BGR
|
||||
if im0 is None:
|
||||
raise FileNotFoundError(f"Image Not Found {path}")
|
||||
paths.append(path)
|
||||
imgs.append(im0)
|
||||
info.append(f"image {self.count + 1}/{self.nf} {path}: ")
|
||||
self.count += 1 # move to the next file
|
||||
if self.count >= self.ni: # end of image list
|
||||
break
|
||||
|
||||
return paths, imgs, info
|
||||
|
||||
def _new_video(self, path):
|
||||
"""Creates a new video capture object for the given path."""
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
||||
if not self.cap.isOpened():
|
||||
raise FileNotFoundError(f"Failed to open video {path}")
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of batches in the object."""
|
||||
return math.ceil(self.nf / self.bs) # number of files
|
||||
|
||||
|
||||
class LoadPilAndNumpy:
|
||||
"""
|
||||
Load images from PIL and Numpy arrays for batch processing.
|
||||
|
||||
This class is designed to manage loading and pre-processing of image data from both PIL and Numpy formats.
|
||||
It performs basic validation and format conversion to ensure that the images are in the required format for
|
||||
downstream processing.
|
||||
|
||||
Attributes:
|
||||
paths (list): List of image paths or autogenerated filenames.
|
||||
im0 (list): List of images stored as Numpy arrays.
|
||||
mode (str): Type of data being processed, defaults to 'image'.
|
||||
bs (int): Batch size, equivalent to the length of `im0`.
|
||||
|
||||
Methods:
|
||||
_single_check(im): Validate and format a single image to a Numpy array.
|
||||
"""
|
||||
|
||||
def __init__(self, im0):
|
||||
"""Initialize PIL and Numpy Dataloader."""
|
||||
if not isinstance(im0, list):
|
||||
im0 = [im0]
|
||||
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
||||
self.im0 = [self._single_check(im) for im in im0]
|
||||
self.mode = "image"
|
||||
self.bs = len(self.im0)
|
||||
|
||||
@staticmethod
|
||||
def _single_check(im):
|
||||
"""Validate and format an image to numpy array."""
|
||||
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
||||
if isinstance(im, Image.Image):
|
||||
if im.mode != "RGB":
|
||||
im = im.convert("RGB")
|
||||
im = np.asarray(im)[:, :, ::-1]
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the 'im0' attribute."""
|
||||
return len(self.im0)
|
||||
|
||||
def __next__(self):
|
||||
"""Returns batch paths, images, processed images, None, ''."""
|
||||
if self.count == 1: # loop only once as it's batch inference
|
||||
raise StopIteration
|
||||
self.count += 1
|
||||
return self.paths, self.im0, [""] * self.bs
|
||||
|
||||
def __iter__(self):
|
||||
"""Enables iteration for class LoadPilAndNumpy."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
|
||||
class LoadTensor:
|
||||
"""
|
||||
Load images from torch.Tensor data.
|
||||
|
||||
This class manages the loading and pre-processing of image data from PyTorch tensors for further processing.
|
||||
|
||||
Attributes:
|
||||
im0 (torch.Tensor): The input tensor containing the image(s).
|
||||
bs (int): Batch size, inferred from the shape of `im0`.
|
||||
mode (str): Current mode, set to 'image'.
|
||||
paths (list): List of image paths or filenames.
|
||||
count (int): Counter for iteration, initialized at 0 during `__iter__()`.
|
||||
|
||||
Methods:
|
||||
_single_check(im, stride): Validate and possibly modify the input tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, im0) -> None:
|
||||
"""Initialize Tensor Dataloader."""
|
||||
self.im0 = self._single_check(im0)
|
||||
self.bs = self.im0.shape[0]
|
||||
self.mode = "image"
|
||||
self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
|
||||
|
||||
@staticmethod
|
||||
def _single_check(im, stride=32):
|
||||
"""Validate and format an image to torch.Tensor."""
|
||||
s = (
|
||||
f"WARNING ⚠️ torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
|
||||
f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
|
||||
)
|
||||
if len(im.shape) != 4:
|
||||
if len(im.shape) != 3:
|
||||
raise ValueError(s)
|
||||
LOGGER.warning(s)
|
||||
im = im.unsqueeze(0)
|
||||
if im.shape[2] % stride or im.shape[3] % stride:
|
||||
raise ValueError(s)
|
||||
if im.max() > 1.0 + torch.finfo(im.dtype).eps: # torch.float32 eps is 1.2e-07
|
||||
LOGGER.warning(
|
||||
f"WARNING ⚠️ torch.Tensor inputs should be normalized 0.0-1.0 but max value is {im.max()}. "
|
||||
f"Dividing input by 255."
|
||||
)
|
||||
im = im.float() / 255.0
|
||||
|
||||
return im
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator object."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Return next item in the iterator."""
|
||||
if self.count == 1:
|
||||
raise StopIteration
|
||||
self.count += 1
|
||||
return self.paths, self.im0, [""] * self.bs
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the batch size."""
|
||||
return self.bs
|
||||
|
||||
|
||||
def autocast_list(source):
|
||||
"""Merges a list of source of different types into a list of numpy arrays or PIL images."""
|
||||
files = []
|
||||
for im in source:
|
||||
if isinstance(im, (str, Path)): # filename or uri
|
||||
files.append(Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im))
|
||||
elif isinstance(im, (Image.Image, np.ndarray)): # PIL or np Image
|
||||
files.append(im)
|
||||
else:
|
||||
raise TypeError(
|
||||
f"type {type(im).__name__} is not a supported Ultralytics prediction source type. \n"
|
||||
f"See https://docs.ultralytics.com/modes/predict for supported source types."
|
||||
)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def get_best_youtube_url(url, use_pafy=True):
|
||||
"""
|
||||
Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
|
||||
|
||||
This function uses the pafy or yt_dlp library to extract the video info from YouTube. It then finds the highest
|
||||
quality MP4 format that has video codec but no audio codec, and returns the URL of this video stream.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the YouTube video.
|
||||
use_pafy (bool): Use the pafy package, default=True, otherwise use yt_dlp package.
|
||||
|
||||
Returns:
|
||||
(str): The URL of the best quality MP4 video stream, or None if no suitable stream is found.
|
||||
"""
|
||||
if use_pafy:
|
||||
check_requirements(("pafy", "youtube_dl==2020.12.2"))
|
||||
import pafy # noqa
|
||||
|
||||
return pafy.new(url).getbestvideo(preftype="mp4").url
|
||||
else:
|
||||
check_requirements("yt-dlp")
|
||||
import yt_dlp
|
||||
|
||||
with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
|
||||
info_dict = ydl.extract_info(url, download=False) # extract info
|
||||
for f in reversed(info_dict.get("formats", [])): # reversed because best is usually last
|
||||
# Find a format with video codec, no audio, *.mp4 extension at least 1920x1080 size
|
||||
good_size = (f.get("width") or 0) >= 1920 or (f.get("height") or 0) >= 1080
|
||||
if good_size and f["vcodec"] != "none" and f["acodec"] == "none" and f["ext"] == "mp4":
|
||||
return f.get("url")
|
||||
|
||||
|
||||
# Define constants
|
||||
LOADERS = (LoadStreams, LoadPilAndNumpy, LoadImagesAndVideos, LoadScreenshots)
|
18
ultralytics/data/scripts/download_weights.sh
Normal file
18
ultralytics/data/scripts/download_weights.sh
Normal file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download latest models from https://github.com/ultralytics/assets/releases
|
||||
# Example usage: bash ultralytics/data/scripts/download_weights.sh
|
||||
# parent
|
||||
# └── weights
|
||||
# ├── yolov8n.pt ← downloads here
|
||||
# ├── yolov8s.pt
|
||||
# └── ...
|
||||
|
||||
python - <<EOF
|
||||
from ultralytics.utils.downloads import attempt_download_asset
|
||||
|
||||
assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '-cls', '-seg', '-pose')]
|
||||
for x in assets:
|
||||
attempt_download_asset(f'weights/{x}')
|
||||
|
||||
EOF
|
60
ultralytics/data/scripts/get_coco.sh
Normal file
60
ultralytics/data/scripts/get_coco.sh
Normal file
@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download COCO 2017 dataset https://cocodataset.org
|
||||
# Example usage: bash data/scripts/get_coco.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco ← downloads here
|
||||
|
||||
# Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
|
||||
if [ "$#" -gt 0 ]; then
|
||||
for opt in "$@"; do
|
||||
case "${opt}" in
|
||||
--train) train=true ;;
|
||||
--val) val=true ;;
|
||||
--test) test=true ;;
|
||||
--segments) segments=true ;;
|
||||
--sama) sama=true ;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
train=true
|
||||
val=true
|
||||
test=false
|
||||
segments=false
|
||||
sama=false
|
||||
fi
|
||||
|
||||
# Download/unzip labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
if [ "$segments" == "true" ]; then
|
||||
f='coco2017labels-segments.zip' # 169 MB
|
||||
elif [ "$sama" == "true" ]; then
|
||||
f='coco2017labels-segments-sama.zip' # 199 MB https://www.sama.com/sama-coco-dataset/
|
||||
else
|
||||
f='coco2017labels.zip' # 46 MB
|
||||
fi
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
|
||||
# Download/unzip images
|
||||
d='../datasets/coco/images' # unzip directory
|
||||
url=http://images.cocodataset.org/zips/
|
||||
if [ "$train" == "true" ]; then
|
||||
f='train2017.zip' # 19G, 118k images
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
if [ "$val" == "true" ]; then
|
||||
f='val2017.zip' # 1G, 5k images
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
if [ "$test" == "true" ]; then
|
||||
f='test2017.zip' # 7G, 41k images (optional)
|
||||
echo 'Downloading' $url$f '...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
fi
|
||||
wait # finish background tasks
|
17
ultralytics/data/scripts/get_coco128.sh
Normal file
17
ultralytics/data/scripts/get_coco128.sh
Normal file
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
|
||||
# Example usage: bash data/scripts/get_coco128.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── coco128 ← downloads here
|
||||
|
||||
# Download/unzip images and labels
|
||||
d='../datasets' # unzip directory
|
||||
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
|
||||
f='coco128.zip' # or 'coco128-segments.zip', 68 MB
|
||||
echo 'Downloading' $url$f ' ...'
|
||||
curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
|
||||
|
||||
wait # finish background tasks
|
51
ultralytics/data/scripts/get_imagenet.sh
Normal file
51
ultralytics/data/scripts/get_imagenet.sh
Normal file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
# Download ILSVRC2012 ImageNet dataset https://image-net.org
|
||||
# Example usage: bash data/scripts/get_imagenet.sh
|
||||
# parent
|
||||
# ├── ultralytics
|
||||
# └── datasets
|
||||
# └── imagenet ← downloads here
|
||||
|
||||
# Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
|
||||
if [ "$#" -gt 0 ]; then
|
||||
for opt in "$@"; do
|
||||
case "${opt}" in
|
||||
--train) train=true ;;
|
||||
--val) val=true ;;
|
||||
esac
|
||||
done
|
||||
else
|
||||
train=true
|
||||
val=true
|
||||
fi
|
||||
|
||||
# Make dir
|
||||
d='../datasets/imagenet' # unzip directory
|
||||
mkdir -p $d && cd $d
|
||||
|
||||
# Download/unzip train
|
||||
if [ "$train" == "true" ]; then
|
||||
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
|
||||
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
|
||||
tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
|
||||
find . -name "*.tar" | while read NAME; do
|
||||
mkdir -p "${NAME%.tar}"
|
||||
tar -xf "${NAME}" -C "${NAME%.tar}"
|
||||
rm -f "${NAME}"
|
||||
done
|
||||
cd ..
|
||||
fi
|
||||
|
||||
# Download/unzip val
|
||||
if [ "$val" == "true" ]; then
|
||||
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
|
||||
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
|
||||
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
|
||||
fi
|
||||
|
||||
# Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
|
||||
# rm train/n04266014/n04266014_10835.JPEG
|
||||
|
||||
# TFRecords (optional)
|
||||
# wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
|
288
ultralytics/data/split_dota.py
Normal file
288
ultralytics/data/split_dota.py
Normal file
@ -0,0 +1,288 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import itertools
|
||||
from glob import glob
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
|
||||
from ultralytics.data.utils import exif_size, img2label_paths
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
check_requirements("shapely")
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def bbox_iof(polygon1, bbox2, eps=1e-6):
|
||||
"""
|
||||
Calculate iofs between bbox1 and bbox2.
|
||||
|
||||
Args:
|
||||
polygon1 (np.ndarray): Polygon coordinates, (n, 8).
|
||||
bbox2 (np.ndarray): Bounding boxes, (n ,4).
|
||||
"""
|
||||
polygon1 = polygon1.reshape(-1, 4, 2)
|
||||
lt_point = np.min(polygon1, axis=-2)
|
||||
rb_point = np.max(polygon1, axis=-2)
|
||||
bbox1 = np.concatenate([lt_point, rb_point], axis=-1)
|
||||
|
||||
lt = np.maximum(bbox1[:, None, :2], bbox2[..., :2])
|
||||
rb = np.minimum(bbox1[:, None, 2:], bbox2[..., 2:])
|
||||
wh = np.clip(rb - lt, 0, np.inf)
|
||||
h_overlaps = wh[..., 0] * wh[..., 1]
|
||||
|
||||
l, t, r, b = (bbox2[..., i] for i in range(4))
|
||||
polygon2 = np.stack([l, t, r, t, r, b, l, b], axis=-1).reshape(-1, 4, 2)
|
||||
|
||||
sg_polys1 = [Polygon(p) for p in polygon1]
|
||||
sg_polys2 = [Polygon(p) for p in polygon2]
|
||||
overlaps = np.zeros(h_overlaps.shape)
|
||||
for p in zip(*np.nonzero(h_overlaps)):
|
||||
overlaps[p] = sg_polys1[p[0]].intersection(sg_polys2[p[-1]]).area
|
||||
unions = np.array([p.area for p in sg_polys1], dtype=np.float32)
|
||||
unions = unions[..., None]
|
||||
|
||||
unions = np.clip(unions, eps, np.inf)
|
||||
outputs = overlaps / unions
|
||||
if outputs.ndim == 1:
|
||||
outputs = outputs[..., None]
|
||||
return outputs
|
||||
|
||||
|
||||
def load_yolo_dota(data_root, split="train"):
|
||||
"""
|
||||
Load DOTA dataset.
|
||||
|
||||
Args:
|
||||
data_root (str): Data root.
|
||||
split (str): The split data set, could be train or val.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
assert split in ["train", "val"]
|
||||
im_dir = Path(data_root) / "images" / split
|
||||
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(str(Path(data_root) / "images" / split / "*"))
|
||||
lb_files = img2label_paths(im_files)
|
||||
annos = []
|
||||
for im_file, lb_file in zip(im_files, lb_files):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
annos.append(dict(ori_size=(h, w), label=lb, filepath=im_file))
|
||||
return annos
|
||||
|
||||
|
||||
def get_windows(im_size, crop_sizes=[1024], gaps=[200], im_rate_thr=0.6, eps=0.01):
|
||||
"""
|
||||
Get the coordinates of windows.
|
||||
|
||||
Args:
|
||||
im_size (tuple): Original image size, (h, w).
|
||||
crop_sizes (List(int)): Crop size of windows.
|
||||
gaps (List(int)): Gap between crops.
|
||||
im_rate_thr (float): Threshold of windows areas divided by image ares.
|
||||
"""
|
||||
h, w = im_size
|
||||
windows = []
|
||||
for crop_size, gap in zip(crop_sizes, gaps):
|
||||
assert crop_size > gap, f"invalid crop_size gap pair [{crop_size} {gap}]"
|
||||
step = crop_size - gap
|
||||
|
||||
xn = 1 if w <= crop_size else ceil((w - crop_size) / step + 1)
|
||||
xs = [step * i for i in range(xn)]
|
||||
if len(xs) > 1 and xs[-1] + crop_size > w:
|
||||
xs[-1] = w - crop_size
|
||||
|
||||
yn = 1 if h <= crop_size else ceil((h - crop_size) / step + 1)
|
||||
ys = [step * i for i in range(yn)]
|
||||
if len(ys) > 1 and ys[-1] + crop_size > h:
|
||||
ys[-1] = h - crop_size
|
||||
|
||||
start = np.array(list(itertools.product(xs, ys)), dtype=np.int64)
|
||||
stop = start + crop_size
|
||||
windows.append(np.concatenate([start, stop], axis=1))
|
||||
windows = np.concatenate(windows, axis=0)
|
||||
|
||||
im_in_wins = windows.copy()
|
||||
im_in_wins[:, 0::2] = np.clip(im_in_wins[:, 0::2], 0, w)
|
||||
im_in_wins[:, 1::2] = np.clip(im_in_wins[:, 1::2], 0, h)
|
||||
im_areas = (im_in_wins[:, 2] - im_in_wins[:, 0]) * (im_in_wins[:, 3] - im_in_wins[:, 1])
|
||||
win_areas = (windows[:, 2] - windows[:, 0]) * (windows[:, 3] - windows[:, 1])
|
||||
im_rates = im_areas / win_areas
|
||||
if not (im_rates > im_rate_thr).any():
|
||||
max_rate = im_rates.max()
|
||||
im_rates[abs(im_rates - max_rate) < eps] = 1
|
||||
return windows[im_rates > im_rate_thr]
|
||||
|
||||
|
||||
def get_window_obj(anno, windows, iof_thr=0.7):
|
||||
"""Get objects for each window."""
|
||||
h, w = anno["ori_size"]
|
||||
label = anno["label"]
|
||||
if len(label):
|
||||
label[:, 1::2] *= w
|
||||
label[:, 2::2] *= h
|
||||
iofs = bbox_iof(label[:, 1:], windows)
|
||||
# Unnormalized and misaligned coordinates
|
||||
return [(label[iofs[:, i] >= iof_thr]) for i in range(len(windows))] # window_anns
|
||||
else:
|
||||
return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))] # window_anns
|
||||
|
||||
|
||||
def crop_and_save(anno, windows, window_objs, im_dir, lb_dir):
|
||||
"""
|
||||
Crop images and save new labels.
|
||||
|
||||
Args:
|
||||
anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
||||
windows (list): A list of windows coordinates.
|
||||
window_objs (list): A list of labels inside each window.
|
||||
im_dir (str): The output directory path of images.
|
||||
lb_dir (str): The output directory path of labels.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
im = cv2.imread(anno["filepath"])
|
||||
name = Path(anno["filepath"]).stem
|
||||
for i, window in enumerate(windows):
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
ph, pw = patch_im.shape[:2]
|
||||
|
||||
cv2.imwrite(str(Path(im_dir) / f"{new_name}.jpg"), patch_im)
|
||||
label = window_objs[i]
|
||||
if len(label) == 0:
|
||||
continue
|
||||
label[:, 1::2] -= x_start
|
||||
label[:, 2::2] -= y_start
|
||||
label[:, 1::2] /= pw
|
||||
label[:, 2::2] /= ph
|
||||
|
||||
with open(Path(lb_dir) / f"{new_name}.txt", "w") as f:
|
||||
for lb in label:
|
||||
formatted_coords = ["{:.6g}".format(coord) for coord in lb[1:]]
|
||||
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
||||
|
||||
|
||||
def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=[1024], gaps=[200]):
|
||||
"""
|
||||
Split both images and labels.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- split
|
||||
- labels
|
||||
- split
|
||||
"""
|
||||
im_dir = Path(save_dir) / "images" / split
|
||||
im_dir.mkdir(parents=True, exist_ok=True)
|
||||
lb_dir = Path(save_dir) / "labels" / split
|
||||
lb_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
annos = load_yolo_dota(data_root, split=split)
|
||||
for anno in tqdm(annos, total=len(annos), desc=split):
|
||||
windows = get_windows(anno["ori_size"], crop_sizes, gaps)
|
||||
window_objs = get_window_obj(anno, windows)
|
||||
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
||||
|
||||
|
||||
def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split train and val set of DOTA.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- train
|
||||
- val
|
||||
- labels
|
||||
- train
|
||||
- val
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
for split in ["train", "val"]:
|
||||
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
||||
|
||||
|
||||
def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=[1.0]):
|
||||
"""
|
||||
Split test set of DOTA, labels are not included within this set.
|
||||
|
||||
Notes:
|
||||
The directory structure assumed for the DOTA dataset:
|
||||
- data_root
|
||||
- images
|
||||
- test
|
||||
and the output directory structure is:
|
||||
- save_dir
|
||||
- images
|
||||
- test
|
||||
"""
|
||||
crop_sizes, gaps = [], []
|
||||
for r in rates:
|
||||
crop_sizes.append(int(crop_size / r))
|
||||
gaps.append(int(gap / r))
|
||||
save_dir = Path(save_dir) / "images" / "test"
|
||||
save_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
im_dir = Path(data_root) / "images" / "test"
|
||||
assert im_dir.exists(), f"Can't find {im_dir}, please check your data root."
|
||||
im_files = glob(str(im_dir / "*"))
|
||||
for im_file in tqdm(im_files, total=len(im_files), desc="test"):
|
||||
w, h = exif_size(Image.open(im_file))
|
||||
windows = get_windows((h, w), crop_sizes=crop_sizes, gaps=gaps)
|
||||
im = cv2.imread(im_file)
|
||||
name = Path(im_file).stem
|
||||
for window in windows:
|
||||
x_start, y_start, x_stop, y_stop = window.tolist()
|
||||
new_name = f"{name}__{x_stop - x_start}__{x_start}___{y_start}"
|
||||
patch_im = im[y_start:y_stop, x_start:x_stop]
|
||||
cv2.imwrite(str(save_dir / f"{new_name}.jpg"), patch_im)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
split_trainval(data_root="DOTAv2", save_dir="DOTAv2-split")
|
||||
split_test(data_root="DOTAv2", save_dir="DOTAv2-split")
|
651
ultralytics/data/utils.py
Normal file
651
ultralytics/data/utils.py
Normal file
@ -0,0 +1,651 @@
|
||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import time
|
||||
import zipfile
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from tarfile import is_tarfile
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image, ImageOps
|
||||
|
||||
from ultralytics.nn.autobackend import check_class_names
|
||||
from ultralytics.utils import (
|
||||
DATASETS_DIR,
|
||||
LOGGER,
|
||||
NUM_THREADS,
|
||||
ROOT,
|
||||
SETTINGS_YAML,
|
||||
TQDM,
|
||||
clean_url,
|
||||
colorstr,
|
||||
emojis,
|
||||
yaml_load,
|
||||
yaml_save,
|
||||
)
|
||||
from ultralytics.utils.checks import check_file, check_font, is_ascii
|
||||
from ultralytics.utils.downloads import download, safe_download, unzip_file
|
||||
from ultralytics.utils.ops import segments2boxes
|
||||
|
||||
HELP_URL = "See https://docs.ultralytics.com/datasets/detect for dataset formatting guidance."
|
||||
IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"} # image suffixes
|
||||
VID_FORMATS = {"asf", "avi", "gif", "m4v", "mkv", "mov", "mp4", "mpeg", "mpg", "ts", "wmv", "webm"} # video suffixes
|
||||
PIN_MEMORY = str(os.getenv("PIN_MEMORY", True)).lower() == "true" # global pin_memory for dataloaders
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
"""Define label paths as a function of image paths."""
|
||||
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
|
||||
|
||||
|
||||
def get_hash(paths):
|
||||
"""Returns a single hash value of a list of paths (files or dirs)."""
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.sha256(str(size).encode()) # hash sizes
|
||||
h.update("".join(paths).encode()) # hash paths
|
||||
return h.hexdigest() # return hash
|
||||
|
||||
|
||||
def exif_size(img: Image.Image):
|
||||
"""Returns exif-corrected PIL size."""
|
||||
s = img.size # (width, height)
|
||||
if img.format == "JPEG": # only support JPEG images
|
||||
with contextlib.suppress(Exception):
|
||||
exif = img.getexif()
|
||||
if exif:
|
||||
rotation = exif.get(274, None) # the EXIF key for the orientation tag is 274
|
||||
if rotation in [6, 8]: # rotation 270 or 90
|
||||
s = s[1], s[0]
|
||||
return s
|
||||
|
||||
|
||||
def verify_image(args):
|
||||
"""Verify one image."""
|
||||
(im_file, cls), prefix = args
|
||||
# Number (found, corrupt), message
|
||||
nf, nc, msg = 0, 0, ""
|
||||
try:
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
shape = (shape[1], shape[0]) # hw
|
||||
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
|
||||
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
|
||||
if im.format.lower() in ("jpg", "jpeg"):
|
||||
with open(im_file, "rb") as f:
|
||||
f.seek(-2, 2)
|
||||
if f.read() != b"\xff\xd9": # corrupt JPEG
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
|
||||
nf = 1
|
||||
except Exception as e:
|
||||
nc = 1
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
|
||||
return (im_file, cls), nf, nc, msg
|
||||
|
||||
|
||||
def verify_image_label(args):
|
||||
"""Verify one image-label pair."""
|
||||
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim = args
|
||||
# Number (missing, found, empty, corrupt), message, segments, keypoints
|
||||
nm, nf, ne, nc, msg, segments, keypoints = 0, 0, 0, 0, "", [], None
|
||||
try:
|
||||
# Verify images
|
||||
im = Image.open(im_file)
|
||||
im.verify() # PIL verify
|
||||
shape = exif_size(im) # image size
|
||||
shape = (shape[1], shape[0]) # hw
|
||||
assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
|
||||
assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
|
||||
if im.format.lower() in ("jpg", "jpeg"):
|
||||
with open(im_file, "rb") as f:
|
||||
f.seek(-2, 2)
|
||||
if f.read() != b"\xff\xd9": # corrupt JPEG
|
||||
ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100)
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved"
|
||||
|
||||
# Verify labels
|
||||
if os.path.isfile(lb_file):
|
||||
nf = 1 # label found
|
||||
with open(lb_file) as f:
|
||||
lb = [x.split() for x in f.read().strip().splitlines() if len(x)]
|
||||
if any(len(x) > 6 for x in lb) and (not keypoint): # is segment
|
||||
classes = np.array([x[0] for x in lb], dtype=np.float32)
|
||||
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...)
|
||||
lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
|
||||
lb = np.array(lb, dtype=np.float32)
|
||||
nl = len(lb)
|
||||
if nl:
|
||||
if keypoint:
|
||||
assert lb.shape[1] == (5 + nkpt * ndim), f"labels require {(5 + nkpt * ndim)} columns each"
|
||||
points = lb[:, 5:].reshape(-1, ndim)[:, :2]
|
||||
else:
|
||||
assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected"
|
||||
points = lb[:, 1:]
|
||||
assert points.max() <= 1, f"non-normalized or out of bounds coordinates {points[points > 1]}"
|
||||
assert lb.min() >= 0, f"negative label values {lb[lb < 0]}"
|
||||
|
||||
# All labels
|
||||
max_cls = lb[:, 0].max() # max label count
|
||||
assert max_cls <= num_cls, (
|
||||
f"Label class {int(max_cls)} exceeds dataset class count {num_cls}. "
|
||||
f"Possible class labels are 0-{num_cls - 1}"
|
||||
)
|
||||
_, i = np.unique(lb, axis=0, return_index=True)
|
||||
if len(i) < nl: # duplicate row check
|
||||
lb = lb[i] # remove duplicates
|
||||
if segments:
|
||||
segments = [segments[x] for x in i]
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed"
|
||||
else:
|
||||
ne = 1 # label empty
|
||||
lb = np.zeros((0, (5 + nkpt * ndim) if keypoint else 5), dtype=np.float32)
|
||||
else:
|
||||
nm = 1 # label missing
|
||||
lb = np.zeros((0, (5 + nkpt * ndim) if keypoints else 5), dtype=np.float32)
|
||||
if keypoint:
|
||||
keypoints = lb[:, 5:].reshape(-1, nkpt, ndim)
|
||||
if ndim == 2:
|
||||
kpt_mask = np.where((keypoints[..., 0] < 0) | (keypoints[..., 1] < 0), 0.0, 1.0).astype(np.float32)
|
||||
keypoints = np.concatenate([keypoints, kpt_mask[..., None]], axis=-1) # (nl, nkpt, 3)
|
||||
lb = lb[:, :5]
|
||||
return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg
|
||||
except Exception as e:
|
||||
nc = 1
|
||||
msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}"
|
||||
return [None, None, None, None, None, nm, nf, ne, nc, msg]
|
||||
|
||||
|
||||
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
|
||||
"""
|
||||
Convert a list of polygons to a binary mask of the specified image size.
|
||||
|
||||
Args:
|
||||
imgsz (tuple): The size of the image as (height, width).
|
||||
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
||||
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
||||
color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1.
|
||||
downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
(np.ndarray): A binary mask of the specified image size with the polygons filled in.
|
||||
"""
|
||||
mask = np.zeros(imgsz, dtype=np.uint8)
|
||||
polygons = np.asarray(polygons, dtype=np.int32)
|
||||
polygons = polygons.reshape((polygons.shape[0], -1, 2))
|
||||
cv2.fillPoly(mask, polygons, color=color)
|
||||
nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
|
||||
# Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1
|
||||
return cv2.resize(mask, (nw, nh))
|
||||
|
||||
|
||||
def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
|
||||
"""
|
||||
Convert a list of polygons to a set of binary masks of the specified image size.
|
||||
|
||||
Args:
|
||||
imgsz (tuple): The size of the image as (height, width).
|
||||
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
||||
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
||||
color (int): The color value to fill in the polygons on the masks.
|
||||
downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
(np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
|
||||
"""
|
||||
return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons])
|
||||
|
||||
|
||||
def polygons2masks_overlap(imgsz, segments, downsample_ratio=1):
|
||||
"""Return a (640, 640) overlap mask."""
|
||||
masks = np.zeros(
|
||||
(imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
|
||||
dtype=np.int32 if len(segments) > 255 else np.uint8,
|
||||
)
|
||||
areas = []
|
||||
ms = []
|
||||
for si in range(len(segments)):
|
||||
mask = polygon2mask(imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1)
|
||||
ms.append(mask)
|
||||
areas.append(mask.sum())
|
||||
areas = np.asarray(areas)
|
||||
index = np.argsort(-areas)
|
||||
ms = np.array(ms)[index]
|
||||
for i in range(len(segments)):
|
||||
mask = ms[i] * (i + 1)
|
||||
masks = masks + mask
|
||||
masks = np.clip(masks, a_min=0, a_max=i + 1)
|
||||
return masks, index
|
||||
|
||||
|
||||
def find_dataset_yaml(path: Path) -> Path:
|
||||
"""
|
||||
Find and return the YAML file associated with a Detect, Segment or Pose dataset.
|
||||
|
||||
This function searches for a YAML file at the root level of the provided directory first, and if not found, it
|
||||
performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError
|
||||
is raised if no YAML file is found or if multiple YAML files are found.
|
||||
|
||||
Args:
|
||||
path (Path): The directory path to search for the YAML file.
|
||||
|
||||
Returns:
|
||||
(Path): The path of the found YAML file.
|
||||
"""
|
||||
files = list(path.glob("*.yaml")) or list(path.rglob("*.yaml")) # try root level first and then recursive
|
||||
assert files, f"No YAML file found in '{path.resolve()}'"
|
||||
if len(files) > 1:
|
||||
files = [f for f in files if f.stem == path.stem] # prefer *.yaml files that match
|
||||
assert len(files) == 1, f"Expected 1 YAML file in '{path.resolve()}', but found {len(files)}.\n{files}"
|
||||
return files[0]
|
||||
|
||||
|
||||
def check_det_dataset(dataset, autodownload=True):
|
||||
"""
|
||||
Download, verify, and/or unzip a dataset if not found locally.
|
||||
|
||||
This function checks the availability of a specified dataset, and if not found, it has the option to download and
|
||||
unzip the dataset. It then reads and parses the accompanying YAML data, ensuring key requirements are met and also
|
||||
resolves paths related to the dataset.
|
||||
|
||||
Args:
|
||||
dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
|
||||
autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
|
||||
|
||||
Returns:
|
||||
(dict): Parsed dataset information and paths.
|
||||
"""
|
||||
|
||||
file = check_file(dataset)
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ""
|
||||
if zipfile.is_zipfile(file) or is_tarfile(file):
|
||||
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
file = find_dataset_yaml(DATASETS_DIR / new_dir)
|
||||
extract_dir, autodownload = file.parent, False
|
||||
|
||||
# Read YAML
|
||||
data = yaml_load(file, append_filename=True) # dictionary
|
||||
|
||||
# Checks
|
||||
for k in "train", "val":
|
||||
if k not in data:
|
||||
if k != "val" or "validation" not in data:
|
||||
raise SyntaxError(
|
||||
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs.")
|
||||
)
|
||||
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
|
||||
data["val"] = data.pop("validation") # replace 'validation' key with 'val' key
|
||||
if "names" not in data and "nc" not in data:
|
||||
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
|
||||
if "names" in data and "nc" in data and len(data["names"]) != data["nc"]:
|
||||
raise SyntaxError(emojis(f"{dataset} 'names' length {len(data['names'])} and 'nc: {data['nc']}' must match."))
|
||||
if "names" not in data:
|
||||
data["names"] = [f"class_{i}" for i in range(data["nc"])]
|
||||
else:
|
||||
data["nc"] = len(data["names"])
|
||||
|
||||
data["names"] = check_class_names(data["names"])
|
||||
|
||||
# Resolve paths
|
||||
path = Path(extract_dir or data.get("path") or Path(data.get("yaml_file", "")).parent) # dataset root
|
||||
if not path.is_absolute():
|
||||
path = (DATASETS_DIR / path).resolve()
|
||||
|
||||
# Set paths
|
||||
data["path"] = path # download scripts
|
||||
for k in "train", "val", "test":
|
||||
if data.get(k): # prepend path
|
||||
if isinstance(data[k], str):
|
||||
x = (path / data[k]).resolve()
|
||||
if not x.exists() and data[k].startswith("../"):
|
||||
x = (path / data[k][3:]).resolve()
|
||||
data[k] = str(x)
|
||||
else:
|
||||
data[k] = [str((path / x).resolve()) for x in data[k]]
|
||||
|
||||
# Parse YAML
|
||||
val, s = (data.get(x) for x in ("val", "download"))
|
||||
if val:
|
||||
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
||||
if not all(x.exists() for x in val):
|
||||
name = clean_url(dataset) # dataset name with URL auth stripped
|
||||
m = f"\nDataset '{name}' images not found ⚠️, missing path '{[x for x in val if not x.exists()][0]}'"
|
||||
if s and autodownload:
|
||||
LOGGER.warning(m)
|
||||
else:
|
||||
m += f"\nNote dataset download directory is '{DATASETS_DIR}'. You can update this in '{SETTINGS_YAML}'"
|
||||
raise FileNotFoundError(m)
|
||||
t = time.time()
|
||||
r = None # success
|
||||
if s.startswith("http") and s.endswith(".zip"): # URL
|
||||
safe_download(url=s, dir=DATASETS_DIR, delete=True)
|
||||
elif s.startswith("bash "): # bash script
|
||||
LOGGER.info(f"Running {s} ...")
|
||||
r = os.system(s)
|
||||
else: # python script
|
||||
exec(s, {"yaml": data})
|
||||
dt = f"({round(time.time() - t, 1)}s)"
|
||||
s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌"
|
||||
LOGGER.info(f"Dataset download {s}\n")
|
||||
check_font("Arial.ttf" if is_ascii(data["names"]) else "Arial.Unicode.ttf") # download fonts
|
||||
|
||||
return data # dictionary
|
||||
|
||||
|
||||
def check_cls_dataset(dataset, split=""):
|
||||
"""
|
||||
Checks a classification dataset such as Imagenet.
|
||||
|
||||
This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
|
||||
If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
|
||||
|
||||
Args:
|
||||
dataset (str | Path): The name of the dataset.
|
||||
split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the following keys:
|
||||
- 'train' (Path): The directory path containing the training set of the dataset.
|
||||
- 'val' (Path): The directory path containing the validation set of the dataset.
|
||||
- 'test' (Path): The directory path containing the test set of the dataset.
|
||||
- 'nc' (int): The number of classes in the dataset.
|
||||
- 'names' (dict): A dictionary of class names in the dataset.
|
||||
"""
|
||||
|
||||
# Download (optional if dataset=https://file.zip is passed directly)
|
||||
if str(dataset).startswith(("http:/", "https:/")):
|
||||
dataset = safe_download(dataset, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
elif Path(dataset).suffix in (".zip", ".tar", ".gz"):
|
||||
file = check_file(dataset)
|
||||
dataset = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
|
||||
dataset = Path(dataset)
|
||||
data_dir = (dataset if dataset.is_dir() else (DATASETS_DIR / dataset)).resolve()
|
||||
if not data_dir.is_dir():
|
||||
LOGGER.warning(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
|
||||
t = time.time()
|
||||
if str(dataset) == "imagenet":
|
||||
subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True)
|
||||
else:
|
||||
url = f"https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip"
|
||||
download(url, dir=data_dir.parent)
|
||||
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
|
||||
LOGGER.info(s)
|
||||
train_set = data_dir / "train"
|
||||
val_set = (
|
||||
data_dir / "val"
|
||||
if (data_dir / "val").exists()
|
||||
else data_dir / "validation"
|
||||
if (data_dir / "validation").exists()
|
||||
else None
|
||||
) # data/test or data/val
|
||||
test_set = data_dir / "test" if (data_dir / "test").exists() else None # data/val or data/test
|
||||
if split == "val" and not val_set:
|
||||
LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
|
||||
elif split == "test" and not test_set:
|
||||
LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.")
|
||||
|
||||
nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()]) # number of classes
|
||||
names = [x.name for x in (data_dir / "train").iterdir() if x.is_dir()] # class names list
|
||||
names = dict(enumerate(sorted(names)))
|
||||
|
||||
# Print to console
|
||||
for k, v in {"train": train_set, "val": val_set, "test": test_set}.items():
|
||||
prefix = f'{colorstr(f"{k}:")} {v}...'
|
||||
if v is None:
|
||||
LOGGER.info(prefix)
|
||||
else:
|
||||
files = [path for path in v.rglob("*.*") if path.suffix[1:].lower() in IMG_FORMATS]
|
||||
nf = len(files) # number of files
|
||||
nd = len({file.parent for file in files}) # number of directories
|
||||
if nf == 0:
|
||||
if k == "train":
|
||||
raise FileNotFoundError(emojis(f"{dataset} '{k}:' no training images found ❌ "))
|
||||
else:
|
||||
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: WARNING ⚠️ no images found")
|
||||
elif nd != nc:
|
||||
LOGGER.warning(f"{prefix} found {nf} images in {nd} classes: ERROR ❌️ requires {nc} classes, not {nd}")
|
||||
else:
|
||||
LOGGER.info(f"{prefix} found {nf} images in {nd} classes ✅ ")
|
||||
|
||||
return {"train": train_set, "val": val_set, "test": test_set, "nc": nc, "names": names}
|
||||
|
||||
|
||||
class HUBDatasetStats:
|
||||
"""
|
||||
A class for generating HUB dataset JSON and `-hub` dataset directory.
|
||||
|
||||
Args:
|
||||
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
|
||||
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
|
||||
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
|
||||
|
||||
Example:
|
||||
Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
|
||||
i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
|
||||
```python
|
||||
from ultralytics.data.utils import HUBDatasetStats
|
||||
|
||||
stats = HUBDatasetStats('path/to/coco8.zip', task='detect') # detect dataset
|
||||
stats = HUBDatasetStats('path/to/coco8-seg.zip', task='segment') # segment dataset
|
||||
stats = HUBDatasetStats('path/to/coco8-pose.zip', task='pose') # pose dataset
|
||||
stats = HUBDatasetStats('path/to/imagenet10.zip', task='classify') # classification dataset
|
||||
|
||||
stats.get_json(save=True)
|
||||
stats.process_images()
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
|
||||
"""Initialize class."""
|
||||
path = Path(path).resolve()
|
||||
LOGGER.info(f"Starting HUB dataset checks for {path}....")
|
||||
|
||||
self.task = task # detect, segment, pose, classify
|
||||
if self.task == "classify":
|
||||
unzip_dir = unzip_file(path)
|
||||
data = check_cls_dataset(unzip_dir)
|
||||
data["path"] = unzip_dir
|
||||
else: # detect, segment, pose
|
||||
_, data_dir, yaml_path = self._unzip(Path(path))
|
||||
try:
|
||||
# Load YAML with checks
|
||||
data = yaml_load(yaml_path)
|
||||
data["path"] = "" # strip path since YAML should be in dataset root for all HUB datasets
|
||||
yaml_save(yaml_path, data)
|
||||
data = check_det_dataset(yaml_path, autodownload) # dict
|
||||
data["path"] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
|
||||
except Exception as e:
|
||||
raise Exception("error/HUB/dataset_stats/init") from e
|
||||
|
||||
self.hub_dir = Path(f'{data["path"]}-hub')
|
||||
self.im_dir = self.hub_dir / "images"
|
||||
self.stats = {"nc": len(data["names"]), "names": list(data["names"].values())} # statistics dictionary
|
||||
self.data = data
|
||||
|
||||
@staticmethod
|
||||
def _unzip(path):
|
||||
"""Unzip data.zip."""
|
||||
if not str(path).endswith(".zip"): # path is data.yaml
|
||||
return False, None, path
|
||||
unzip_dir = unzip_file(path, path=path.parent)
|
||||
assert unzip_dir.is_dir(), (
|
||||
f"Error unzipping {path}, {unzip_dir} not found. " f"path/to/abc.zip MUST unzip to path/to/abc/"
|
||||
)
|
||||
return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path
|
||||
|
||||
def _hub_ops(self, f):
|
||||
"""Saves a compressed image for HUB previews."""
|
||||
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
||||
|
||||
def get_json(self, save=False, verbose=False):
|
||||
"""Return dataset JSON for Ultralytics HUB."""
|
||||
|
||||
def _round(labels):
|
||||
"""Update labels to integer class and 4 decimal place floats."""
|
||||
if self.task == "detect":
|
||||
coordinates = labels["bboxes"]
|
||||
elif self.task == "segment":
|
||||
coordinates = [x.flatten() for x in labels["segments"]]
|
||||
elif self.task == "pose":
|
||||
n = labels["keypoints"].shape[0]
|
||||
coordinates = np.concatenate((labels["bboxes"], labels["keypoints"].reshape(n, -1)), 1)
|
||||
else:
|
||||
raise ValueError("Undefined dataset task.")
|
||||
zipped = zip(labels["cls"], coordinates)
|
||||
return [[int(c[0]), *(round(float(x), 4) for x in points)] for c, points in zipped]
|
||||
|
||||
for split in "train", "val", "test":
|
||||
self.stats[split] = None # predefine
|
||||
path = self.data.get(split)
|
||||
|
||||
# Check split
|
||||
if path is None: # no split
|
||||
continue
|
||||
files = [f for f in Path(path).rglob("*.*") if f.suffix[1:].lower() in IMG_FORMATS] # image files in split
|
||||
if not files: # no images
|
||||
continue
|
||||
|
||||
# Get dataset statistics
|
||||
if self.task == "classify":
|
||||
from torchvision.datasets import ImageFolder
|
||||
|
||||
dataset = ImageFolder(self.data[split])
|
||||
|
||||
x = np.zeros(len(dataset.classes)).astype(int)
|
||||
for im in dataset.imgs:
|
||||
x[im[1]] += 1
|
||||
|
||||
self.stats[split] = {
|
||||
"instance_stats": {"total": len(dataset), "per_class": x.tolist()},
|
||||
"image_stats": {"total": len(dataset), "unlabelled": 0, "per_class": x.tolist()},
|
||||
"labels": [{Path(k).name: v} for k, v in dataset.imgs],
|
||||
}
|
||||
else:
|
||||
from ultralytics.data import YOLODataset
|
||||
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data, task=self.task)
|
||||
x = np.array(
|
||||
[
|
||||
np.bincount(label["cls"].astype(int).flatten(), minlength=self.data["nc"])
|
||||
for label in TQDM(dataset.labels, total=len(dataset), desc="Statistics")
|
||||
]
|
||||
) # shape(128x80)
|
||||
self.stats[split] = {
|
||||
"instance_stats": {"total": int(x.sum()), "per_class": x.sum(0).tolist()},
|
||||
"image_stats": {
|
||||
"total": len(dataset),
|
||||
"unlabelled": int(np.all(x == 0, 1).sum()),
|
||||
"per_class": (x > 0).sum(0).tolist(),
|
||||
},
|
||||
"labels": [{Path(k).name: _round(v)} for k, v in zip(dataset.im_files, dataset.labels)],
|
||||
}
|
||||
|
||||
# Save, print and return
|
||||
if save:
|
||||
self.hub_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/
|
||||
stats_path = self.hub_dir / "stats.json"
|
||||
LOGGER.info(f"Saving {stats_path.resolve()}...")
|
||||
with open(stats_path, "w") as f:
|
||||
json.dump(self.stats, f) # save stats.json
|
||||
if verbose:
|
||||
LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False))
|
||||
return self.stats
|
||||
|
||||
def process_images(self):
|
||||
"""Compress images for Ultralytics HUB."""
|
||||
from ultralytics.data import YOLODataset # ClassificationDataset
|
||||
|
||||
self.im_dir.mkdir(parents=True, exist_ok=True) # makes dataset-hub/images/
|
||||
for split in "train", "val", "test":
|
||||
if self.data.get(split) is None:
|
||||
continue
|
||||
dataset = YOLODataset(img_path=self.data[split], data=self.data)
|
||||
with ThreadPool(NUM_THREADS) as pool:
|
||||
for _ in TQDM(pool.imap(self._hub_ops, dataset.im_files), total=len(dataset), desc=f"{split} images"):
|
||||
pass
|
||||
LOGGER.info(f"Done. All images saved to {self.im_dir}")
|
||||
return self.im_dir
|
||||
|
||||
|
||||
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
||||
"""
|
||||
Compresses a single image file to reduced size while preserving its aspect ratio and quality using either the Python
|
||||
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
|
||||
resized.
|
||||
|
||||
Args:
|
||||
f (str): The path to the input image file.
|
||||
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
|
||||
max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
|
||||
quality (int, optional): The image compression quality as a percentage. Default is 50%.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from pathlib import Path
|
||||
from ultralytics.data.utils import compress_one_image
|
||||
|
||||
for f in Path('path/to/dataset').rglob('*.jpg'):
|
||||
compress_one_image(f)
|
||||
```
|
||||
"""
|
||||
|
||||
try: # use PIL
|
||||
im = Image.open(f)
|
||||
r = max_dim / max(im.height, im.width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = im.resize((int(im.width * r), int(im.height * r)))
|
||||
im.save(f_new or f, "JPEG", quality=quality, optimize=True) # save
|
||||
except Exception as e: # use OpenCV
|
||||
LOGGER.info(f"WARNING ⚠️ HUB ops PIL failure {f}: {e}")
|
||||
im = cv2.imread(f)
|
||||
im_height, im_width = im.shape[:2]
|
||||
r = max_dim / max(im_height, im_width) # ratio
|
||||
if r < 1.0: # image too large
|
||||
im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA)
|
||||
cv2.imwrite(str(f_new or f), im)
|
||||
|
||||
|
||||
def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
|
||||
"""
|
||||
Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
|
||||
|
||||
Args:
|
||||
path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
|
||||
weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
|
||||
annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from ultralytics.data.utils import autosplit
|
||||
|
||||
autosplit()
|
||||
```
|
||||
"""
|
||||
|
||||
path = Path(path) # images dir
|
||||
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
|
||||
n = len(files) # number of files
|
||||
random.seed(0) # for reproducibility
|
||||
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
|
||||
|
||||
txt = ["autosplit_train.txt", "autosplit_val.txt", "autosplit_test.txt"] # 3 txt files
|
||||
for x in txt:
|
||||
if (path.parent / x).exists():
|
||||
(path.parent / x).unlink() # remove existing
|
||||
|
||||
LOGGER.info(f"Autosplitting images from {path}" + ", using *.txt labeled images only" * annotated_only)
|
||||
for i, img in TQDM(zip(indices, files), total=n):
|
||||
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
|
||||
with open(path.parent / txt[i], "a") as f:
|
||||
f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n") # add image to txt file
|
Reference in New Issue
Block a user