# Ultralytics YOLO 🚀, AGPL-3.0 license import contextlib import math import warnings from pathlib import Path import os import cv2 import matplotlib.pyplot as plt import numpy as np import torch from PIL import Image, ImageDraw, ImageFont from PIL import __version__ as pil_version # from utils.general import increment_path # from ultralytics.utils import LOGGER, TryExcept, ops, plt_settings, threaded # from .checks import check_font, check_version, is_ascii # from .files import increment_path class Colors: """ Ultralytics default color palette https://ultralytics.com/. This class provides methods to work with the Ultralytics color palette, including converting hex color codes to RGB values. Attributes: palette (list of tuple): List of RGB color values. n (int): The number of colors in the palette. pose_palette (np.array): A specific color palette array with dtype np.uint8. """ def __init__(self): """Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values().""" hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.n = len(self.palette) self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255], [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255], [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]], dtype=np.uint8) def __call__(self, i, bgr=False): """Converts hex color codes to RGB values.""" c = self.palette[int(i) % self.n] return (c[2], c[1], c[0]) if bgr else c @staticmethod def hex2rgb(h): """Converts hex color codes to RGB values (i.e. default PIL order).""" return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) colors = Colors() # create instance for 'from utils.plots import colors' class Annotator: """ Ultralytics Annotator for train/val mosaics and JPGs and predictions annotations. Attributes: im (Image.Image or numpy array): The image to annotate. pil (bool): Whether to use PIL or cv2 for drawing annotations. font (ImageFont.truetype or ImageFont.load_default): Font used for text annotations. lw (float): Line width for drawing. skeleton (List[List[int]]): Skeleton structure for keypoints. limb_color (List[int]): Color palette for limbs. kpt_color (List[int]): Color palette for keypoints. """ def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): """Initialize the Annotator class with image and line width along with color palette for keypoints and limbs.""" assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width # Pose self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] self.limb_color = colors.pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] self.kpt_color = colors.pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): """Add one xyxy box to image with label.""" if isinstance(box, torch.Tensor): box = box.tolist() p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) if label: tf = max(self.lw - 1, 1) # font thickness w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = p1[1] - h >= 3 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False): """ Plot masks on image. Args: masks (tensor): Predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): Colors for predicted masks, [[r, g, b] * n] im_gpu (tensor): Image is in cuda, shape: [3, h, w], range: [0, 1] alpha (float): Mask transparency: 0.0 fully transparent, 1.0 opaque retina_masks (bool): Whether to use high resolution masks or not. Defaults to False. """ if self.pil: # Convert to numpy first self.im = np.asarray(self.im).copy() if len(masks) == 0: self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 if im_gpu.device != masks.device: im_gpu = im_gpu.to(masks.device) colors = torch.tensor(colors, device=masks.device, dtype=torch.float32) / 255.0 # shape(n,3) colors = colors[:, None, None] # shape(n,1,1,3) masks = masks.unsqueeze(3) # shape(n,h,w,1) masks_color = masks * (colors * alpha) # shape(n,h,w,3) inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) mcs = masks_color.max(dim=0).values # shape(n,h,w,3) im_gpu = im_gpu.flip(dims=[0]) # flip channel im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) im_gpu = im_gpu * inv_alph_masks[-1] + mcs im_mask = (im_gpu * 255) im_mask_np = im_mask.byte().cpu().numpy() self.im[:] = im_mask_np if retina_masks else scale_image(im_mask_np, self.im.shape) if self.pil: # Convert im back to PIL and update draw self.fromarray(self.im) def kpts(self, kpts, shape=(640, 640), radius=5, kpt_line=True): """ Plot keypoints on the image. Args: kpts (tensor): Predicted keypoints with shape [17, 3]. Each keypoint has (x, y, confidence). shape (tuple): Image shape as a tuple (h, w), where h is the height and w is the width. radius (int, optional): Radius of the drawn keypoints. Default is 5. kpt_line (bool, optional): If True, the function will draw lines connecting keypoints for human pose. Default is True. Note: `kpt_line=True` currently only supports human pose plotting. """ if self.pil: # Convert to numpy first self.im = np.asarray(self.im).copy() nkpt, ndim = kpts.shape is_pose = nkpt == 17 and ndim == 3 kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting for i, k in enumerate(kpts): color_k = [int(x) for x in self.kpt_color[i]] if is_pose else colors(i) x_coord, y_coord = k[0], k[1] if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: if len(k) == 3: conf = k[2] if conf < 0.5: continue cv2.circle(self.im, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA) if kpt_line: ndim = kpts.shape[-1] for i, sk in enumerate(self.skeleton): pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1])) pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1])) if ndim == 3: conf1 = kpts[(sk[0] - 1), 2] conf2 = kpts[(sk[1] - 1), 2] if conf1 < 0.5 or conf2 < 0.5: continue if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0: continue if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0: continue cv2.line(self.im, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA) if self.pil: # Convert im back to PIL and update draw self.fromarray(self.im) def rectangle(self, xy, fill=None, outline=None, width=1): """Add rectangle to image (PIL-only).""" self.draw.rectangle(xy, fill, outline, width) def text(self, xy, text, txt_color=(255, 255, 255), anchor='top', box_style=False): """Adds text to an image using PIL or cv2.""" if anchor == 'bottom': # start y from font bottom w, h = self.font.getsize(text) # text width, height xy[1] += 1 - h if self.pil: if box_style: w, h = self.font.getsize(text) self.draw.rectangle((xy[0], xy[1], xy[0] + w + 1, xy[1] + h + 1), fill=txt_color) # Using `txt_color` for background and draw fg with white color txt_color = (255, 255, 255) if '\n' in text: lines = text.split('\n') _, h = self.font.getsize(text) for line in lines: self.draw.text(xy, line, fill=txt_color, font=self.font) xy[1] += h else: self.draw.text(xy, text, fill=txt_color, font=self.font) else: if box_style: tf = max(self.lw - 1, 1) # font thickness w, h = cv2.getTextSize(text, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = xy[1] - h >= 3 p2 = xy[0] + w, xy[1] - h - 3 if outside else xy[1] + h + 3 cv2.rectangle(self.im, xy, p2, txt_color, -1, cv2.LINE_AA) # filled # Using `txt_color` for background and draw fg with white color txt_color = (255, 255, 255) tf = max(self.lw - 1, 1) # font thickness cv2.putText(self.im, text, xy, 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) def fromarray(self, im): """Update self.im from a numpy array.""" self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) def result(self): """Return annotated image as array.""" return np.asarray(self.im) def scale_image(masks, im0_shape, ratio_pad=None): """ Takes a mask, and resizes it to the original image size Args: masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3]. im0_shape (tuple): the original image shape ratio_pad (tuple): the ratio of the padding to the original image. Returns: masks (torch.Tensor): The masks that are being returned. """ # Rescale coordinates (xyxy) from im1_shape to im0_shape im1_shape = masks.shape if im1_shape[:2] == im0_shape[:2]: return masks if ratio_pad is None: # calculate from im0_shape gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] top, left = int(pad[1]), int(pad[0]) # y, x bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') masks = masks[top:bottom, left:right] masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) if len(masks.shape) == 2: masks = masks[:, :, None] return masks def boxing_img(det, img, line_width=3): annotator = Annotator(img, line_width) for *xyxy, id, conf, cls, _, _ in reversed(det): label = (f'id:{int(id)} '+str(int(cls)) +f' {conf:.2f}') if cls==0: color = colors(int(cls), True) else: color = colors(int(id), True) annotator.box_label(xyxy, label, color=color) # Save results (image and video with tracking) imgx = annotator.result() return imgx