345 lines
14 KiB
Python
345 lines
14 KiB
Python
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
|
|
|
import contextlib
|
|
import math
|
|
import warnings
|
|
from pathlib import Path
|
|
import os
|
|
import cv2
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import torch
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
from PIL import __version__ as pil_version
|
|
# from utils.general import increment_path
|
|
|
|
# from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded
|
|
|
|
# from .checks import check_font, check_version, is_ascii
|
|
# from .files import increment_path
|
|
|
|
|
|
class Colors:
|
|
"""
|
|
Ultralytics default color palette https://ultralytics.com/.
|
|
|
|
This class provides methods to work with the Ultralytics color palette, including converting hex color codes to
|
|
RGB values.
|
|
|
|
Attributes:
|
|
palette (list of tuple): List of RGB color values.
|
|
n (int): The number of colors in the palette.
|
|
pose_palette (np.array): A specific color palette array with dtype np.uint8.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""
|
|
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
|
|
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
|
|
|
|
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
|
|
self.n = len(self.palette)
|
|
self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],
|
|
[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],
|
|
[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],
|
|
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],
|
|
dtype=np.uint8)
|
|
|
|
def __call__(self, i, bgr=False):
|
|
"""Converts hex color codes to RGB values."""
|
|
c = self.palette[int(i) % self.n]
|
|
return (c[2], c[1], c[0]) if bgr else c
|
|
|
|
@staticmethod
|
|
def hex2rgb(h):
|
|
"""Converts hex color codes to RGB values (i.e. default PIL order)."""
|
|
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
|
|
|
|
|
|
colors = Colors() # create instance for 'from utils.plots import colors'
|
|
|
|
|
|
class Annotator:
|
|
"""
|
|
Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations.
|
|
|
|
Attributes:
|
|
im (Image.Image or numpy array): The image to annotate.
|
|
pil (bool): Whether to use PIL or cv2 for drawing annotations.
|
|
font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations.
|
|
lw (float): Line width for drawing.
|
|
skeleton (List[List[int]]): Skeleton structure for keypoints.
|
|
limb_color (List[int]): Color palette for limbs.
|
|
kpt_color (List[int]): Color palette for keypoints.
|
|
"""
|
|
|
|
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
|
|
"""Initialize the Annotator class with image and line width along with color palette for keypoints and limbs."""
|
|
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
|
|
self.im = im
|
|
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
|
|
# Pose
|
|
self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9],
|
|
[8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
|
|
|
|
self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
|
|
self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
|
|
|
|
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
|
|
"""Add one xyxy box to image with label."""
|
|
if isinstance(box, torch.Tensor):
|
|
box = box.tolist()
|
|
|
|
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
|
|
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
|
|
if label:
|
|
tf = max(self.lw - 1, 1) # font thickness
|
|
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
|
|
outside = p1[1] - h >= 3
|
|
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
|
|
cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
|
|
cv2.putText(self.im,
|
|
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
|
|
0,
|
|
self.lw / 3,
|
|
txt_color,
|
|
thickness=tf,
|
|
lineType=cv2.LINE_AA)
|
|
|
|
def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
|
|
"""
|
|
Plot masks on image.
|
|
|
|
Args:
|
|
masks (tensor): Predicted masks on cuda, shape: [n, h, w]
|
|
colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n]
|
|
im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1]
|
|
alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque
|
|
retina_masks (bool): Whether to use high resolution masks or not. Defaults to False.
|
|
"""
|
|
if self.pil:
|
|
# Convert to numpy first
|
|
self.im = np.asarray(self.im).copy()
|
|
if len(masks) == 0:
|
|
self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255
|
|
if im_gpu.device != masks.device:
|
|
im_gpu = im_gpu.to(masks.device)
|
|
colors = torch.tensor(colors, device=masks.device, dtype=torch.float32) / 255.0 # shape(n,3)
|
|
colors = colors[:, None, None] # shape(n,1,1,3)
|
|
masks = masks.unsqueeze(3) # shape(n,h,w,1)
|
|
masks_color = masks * (colors * alpha) # shape(n,h,w,3)
|
|
|
|
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
|
|
mcs = masks_color.max(dim=0).values # shape(n,h,w,3)
|
|
|
|
im_gpu = im_gpu.flip(dims=[0]) # flip channel
|
|
im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3)
|
|
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
|
|
im_mask = (im_gpu * 255)
|
|
im_mask_np = im_mask.byte().cpu().numpy()
|
|
self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape)
|
|
if self.pil:
|
|
# Convert im back to PIL and update draw
|
|
self.fromarray(self.im)
|
|
|
|
def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True):
|
|
"""
|
|
Plot keypoints on the image.
|
|
|
|
Args:
|
|
kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence).
|
|
shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width.
|
|
radius (int, optional): Radius of the drawn keypoints. Default is 5.
|
|
kpt_line (bool, optional): If True, the function will draw lines connecting keypoints
|
|
for human pose. Default is True.
|
|
|
|
Note: `kpt_line=True` currently only supports human pose plotting.
|
|
"""
|
|
if self.pil:
|
|
# Convert to numpy first
|
|
self.im = np.asarray(self.im).copy()
|
|
nkpt, ndim = kpts.shape
|
|
is_pose = nkpt == 17 and ndim == 3
|
|
kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting
|
|
for i, k in enumerate(kpts):
|
|
color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i)
|
|
x_coord, y_coord = k[0], k[1]
|
|
if x_coord % shape[1] != 0 and y_coord % shape[0] != 0:
|
|
if len(k) == 3:
|
|
conf = k[2]
|
|
if conf < 0.5:
|
|
continue
|
|
cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA)
|
|
|
|
if kpt_line:
|
|
ndim = kpts.shape[-1]
|
|
for i, sk in enumerate(self.skeleton):
|
|
pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1]))
|
|
pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1]))
|
|
if ndim == 3:
|
|
conf1 = kpts[(sk[0] - 1), 2]
|
|
conf2 = kpts[(sk[1] - 1), 2]
|
|
if conf1 < 0.5 or conf2 < 0.5:
|
|
continue
|
|
if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0:
|
|
continue
|
|
if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0:
|
|
continue
|
|
cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA)
|
|
if self.pil:
|
|
# Convert im back to PIL and update draw
|
|
self.fromarray(self.im)
|
|
|
|
def rectangle(self, xy, fill=None, outline=None, width=1):
|
|
"""Add rectangle to image (PIL-only)."""
|
|
self.draw.rectangle(xy, fill, outline, width)
|
|
|
|
def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False):
|
|
"""Adds text to an image using PIL or cv2."""
|
|
if anchor == 'bottom': # start y from font bottom
|
|
w, h = self.font.getsize(text) # text width, height
|
|
xy[1] += 1 - h
|
|
if self.pil:
|
|
if box_style:
|
|
w, h = self.font.getsize(text)
|
|
self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color)
|
|
# Using `txt_color` for background and draw fg with white color
|
|
txt_color = (255, 255, 255)
|
|
if '\n' in text:
|
|
lines = text.split('\n')
|
|
_, h = self.font.getsize(text)
|
|
for line in lines:
|
|
self.draw.text(xy, line, fill=txt_color, font=self.font)
|
|
xy[1] += h
|
|
else:
|
|
self.draw.text(xy, text, fill=txt_color, font=self.font)
|
|
else:
|
|
if box_style:
|
|
tf = max(self.lw - 1, 1) # font thickness
|
|
w, h = cv2.getTextSize(text, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
|
|
outside = xy[1] - h >= 3
|
|
p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3
|
|
cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled
|
|
# Using `txt_color` for background and draw fg with white color
|
|
txt_color = (255, 255, 255)
|
|
tf = max(self.lw - 1, 1) # font thickness
|
|
cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
|
|
|
|
def fromarray(self, im):
|
|
"""Update self.im from a numpy array."""
|
|
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
|
|
self.draw = ImageDraw.Draw(self.im)
|
|
|
|
def result(self):
|
|
"""Return annotated image as array."""
|
|
return np.asarray(self.im)
|
|
|
|
|
|
def scale_image(masks, im0_shape, ratio_pad=None):
|
|
"""
|
|
Takes a mask, and resizes it to the original image size
|
|
|
|
Args:
|
|
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
|
|
im0_shape (tuple): the original image shape
|
|
ratio_pad (tuple): the ratio of the padding to the original image.
|
|
|
|
Returns:
|
|
masks (torch.Tensor): The masks that are being returned.
|
|
"""
|
|
# Rescale coordinates (xyxy) from im1_shape to im0_shape
|
|
im1_shape = masks.shape
|
|
if im1_shape[:2] == im0_shape[:2]:
|
|
return masks
|
|
if ratio_pad is None: # calculate from im0_shape
|
|
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
|
|
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
|
|
else:
|
|
gain = ratio_pad[0][0]
|
|
pad = ratio_pad[1]
|
|
top, left = int(pad[1]), int(pad[0]) # y, x
|
|
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
|
|
|
|
if len(masks.shape) < 2:
|
|
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
|
|
masks = masks[top:bottom, left:right]
|
|
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
|
|
if len(masks.shape) == 2:
|
|
masks = masks[:, :, None]
|
|
|
|
return masks
|
|
|
|
|
|
def boxing_img(det, img, line_width=3):
|
|
annotator = Annotator(img, line_width)
|
|
for *xyxy, id, conf, cls, _, _ in reversed(det):
|
|
label = (f'id:{int(id)} '+str(int(cls)) +f' {conf:.2f}')
|
|
if cls==0:
|
|
color = colors(int(cls), True)
|
|
else:
|
|
color = colors(int(id), True)
|
|
annotator.box_label(xyxy, label, color=color)
|
|
|
|
# Save results (image and video with tracking)
|
|
imgx = annotator.result()
|
|
|
|
return imgx
|
|
|
|
def draw_tracking_boxes(imgs, tracks, scale=2):
|
|
'''需要确保 imgs 覆盖tracks中的帧ID数
|
|
tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
|
|
0 1 2 3 4 5 6 7 8
|
|
关键:
|
|
(1) imgs中的次序和 track 中的 fid 对应
|
|
(2) img 尺度小对于xyxy减半
|
|
|
|
'''
|
|
|
|
def array2list(bboxes):
|
|
track_fids = np.unique(bboxes[:, 7].astype(int))
|
|
track_fids.sort()
|
|
|
|
lboxes = []
|
|
for f_id in track_fids:
|
|
# print(f"The ID is: {t_id}")
|
|
idx = np.where(bboxes[:, 7] == f_id)[0]
|
|
box = bboxes[idx, :]
|
|
lboxes.append(box)
|
|
|
|
assert len(set(box[:, 4])) == len(box), "Please check!!!"
|
|
|
|
return lboxes
|
|
|
|
bboxes = array2list(tracks)
|
|
|
|
# if len(bboxes)!=len(imgs):
|
|
# return False, imgs
|
|
|
|
subimgs = []
|
|
for i, boxes in enumerate(bboxes):
|
|
fid = int(boxes[0, 7])
|
|
annotator = Annotator(imgs[fid-1].copy())
|
|
for *xyxy, tid, conf, cls, fid, bid in boxes:
|
|
label = f'id:{int(tid)}_{int(cls)}_{conf:.2f}'
|
|
|
|
if cls==0:
|
|
color = colors(int(cls), True)
|
|
elif tid>0 and cls!=0:
|
|
color = colors(int(tid), True)
|
|
else:
|
|
color = colors(19, True) # 19为调色板的最后一个元素
|
|
|
|
pt2 = [p/scale for p in xyxy]
|
|
annotator.box_label(pt2, label, color=color)
|
|
|
|
img = annotator.result()
|
|
subimgs.append((fid-1, img))
|
|
|
|
return subimgs
|
|
|
|
|
|
|
|
|
|
|
|
|