ieemoo-ai-detecttracking/featureVal.py

# -*- coding: utf-8 -*-
"""
Created on Fri May 31 14:50:21 2024

@author: ym
"""
import cv2
import numpy as np
import torch
from scipy.spatial.distance import cdist
from tracking.trackers.reid.config import config as ReIDConfig
from tracking.trackers.reid.reid_interface import ReIDInterface
ReIDEncoder = ReIDInterface(ReIDConfig)

def read_data_file(datapath):

    with open(datapath, 'r') as file:
        lines = file.readlines()
    Videos = []
    FrameBoxes, FrameFeats = [], []
    boxes, feats = [], []

    bboxes, ffeats = [], []
    timestamp = []
    t1 = None
    for line in lines:
        if line.find('CameraId') >= 0:
            t = int(line.split(',')[1].split(':')[1])
            timestamp.append(t)

            if len(boxes) and len(feats):
                FrameBoxes.append(np.array(boxes, dtype = np.float32))
                FrameFeats.append(np.array(feats, dtype = np.float32))

                boxes, feats = [], []

            if t1 and t - t1 > 1e4:
                Videos.append((FrameBoxes, FrameFeats))
                FrameBoxes, FrameFeats = [], []
            t1 = int(line.split(',')[1].split(':')[1])

        if line.find('box') >= 0:
            box = line.split(':', )[1].split(',')[:-1]
            boxes.append(box)
            bboxes.append(boxes)


        if line.find('feat') >= 0:
            feat = line.split(':', )[1].split(',')[:-1]
            feats.append(feat)
            ffeats.append(feat)


    FrameBoxes.append(np.array(boxes, dtype = np.float32))
    FrameFeats.append(np.array(feats, dtype = np.float32))
    Videos.append((FrameBoxes, FrameFeats))

    TimeStamp = np.array(timestamp, dtype = np.float32)
    DimesDiff = np.diff((timestamp))

    return Videos

def inference_image(image, detections):
    H, W, _ = np.shape(image)
    imgs = []
    batch_patches = []
    patches = []
    for d in range(np.size(detections, 0)):
        tlbr = detections[d, :4].astype(np.int_)
        tlbr[0] = max(0, tlbr[0])
        tlbr[1] = max(0, tlbr[1])
        tlbr[2] = min(W - 1, tlbr[2])
        tlbr[3] = min(H - 1, tlbr[3])
        img1 = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]

        img = img1[:, :, ::-1].copy()    # the model expects RGB inputs
        patch = ReIDEncoder.transform(img)

        imgs.append(img1)
        # patch = patch.to(device=self.device).half()
        if str(ReIDEncoder.device) != "cpu":
            patch = patch.to(device=ReIDEncoder.device).half()
        else:
            patch = patch.to(device=ReIDEncoder.device)

        patches.append(patch)
        if (d + 1) % ReIDEncoder.batch_size == 0:
            patches = torch.stack(patches, dim=0)
            batch_patches.append(patches)
            patches = []

    if len(patches):
        patches = torch.stack(patches, dim=0)
        batch_patches.append(patches)

    features = np.zeros((0, ReIDEncoder.embedding_size))
    for patches in batch_patches:
        pred = ReIDEncoder.model(patches)
        pred[torch.isinf(pred)] = 1.0
        feat = pred.cpu().data.numpy()
        features = np.vstack((features, feat))

    return imgs, features

def readimg():
    imgpath = r"D:\datasets\ym\Img_ResnetData\result\0.png"
    image = cv2.imread(imgpath)

    img = cv2.resize(image, (224, 224))

    cv2.imwrite('0_224x224.jpg', img)


def readdata(datapath):

    with open(datapath, 'r') as file:
        lines = file.readlines()
    dlist = lines[0].split(',')
    dfloat = [float(d) for d in dlist]
    afeat = np.array(dfloat).reshape(1, -1)

    return afeat
def readrawimg(datapath):
    with open(datapath, 'r') as file:
        llines = file.readlines()
    imgs = []

    row = 224

    for i in range(8):
        lines = llines[i*224 : (i+1)*224]


        img = np.empty((224, 224, 0), dtype=np.float32)
        imgr = np.empty((0, 224), dtype=np.float32)
        imgg = np.empty((0, 224), dtype=np.float32)
        imgb = np.empty((0, 224), dtype=np.float32)

        for line in lines:
            dlist = line.split(' ')[0:224]

            img_r = np.array([float(s.split(',')[0]) for s in dlist], dtype=np.float32).reshape(1, -1)
            img_g = np.array([float(s.split(',')[1]) for s in dlist], dtype=np.float32).reshape(1, -1)
            img_b = np.array([float(s.split(',')[2]) for s in dlist], dtype=np.float32).reshape(1, -1)

            # img_r = [float(s.split(',')[0]) for s in dlist if len(s.split(',')[0].encode('utf-8')) == 4]
            # img_g = [float(s.split(',')[1]) for s in dlist if len(s.split(',')[1].encode('utf-8')) == 4]
            # img_b = [float(s.split(',')[2]) for s in dlist if len(s.split(',')[2].encode('utf-8')) == 4]

            imgr = np.concatenate((imgr, img_r), axis=0)
            imgg = np.concatenate((imgg, img_g), axis=0)
            imgb = np.concatenate((imgb, img_b), axis=0)

        imgr = imgr[:, :, None]
        imgg = imgg[:, :, None]
        imgb = imgb[:, :, None]

        img = np.concatenate((imgb, imgg, imgr), axis=2).astype(np.uint8)

        imgs.append(img)

    return imgs


def inference(image):

    patches = []

    image = image[:, :, ::-1].copy()    # the model expects RGB inputs
    patch = ReIDEncoder.transform(image)

    patch = patch.to(device=ReIDEncoder.device)

    patches.append(patch)
    patches = torch.stack(patches, dim=0)
    pred = ReIDEncoder.model(patches)
    pred[torch.isinf(pred)] = 1.0
    bfeat = pred.cpu().data.numpy()

    return bfeat


def test_img_feat():
    # datapath = r"D:\datasets\ym\Img_ResnetData\aa\aa.txt"
    # afeat = readdata(datapath)

    imgpath = r"D:\datasets\ym\Img_ResnetData\aa\aa.jpg"
    img = cv2.imread(imgpath)
    bfeat = inference(img)


    datapath = r"D:\datasets\ym\Img_ResnetData\rawimg\7.txt"
    afeat = readdata(datapath)

    rawpath = r"D:\datasets\ym\Img_ResnetData\rawimg\28950640607_mat_rgb"
    imgx = readrawimg(rawpath)
    cv2.imwrite("rawimg.png", imgx[7])
    bfeatx = inference(imgx[7])

    cost_matrix = 1 - np.maximum(0.0, cdist(afeat, bfeatx, 'cosine'))

    imgpath1 = r"D:\datasets\ym\Img_ResnetData\result\0_224x224.png"
    img1 = cv2.imread(imgpath1)
    bfeat1 = inference(img1)

    aafeat = afeat / np.linalg.norm(afeat, ord=2, axis=1, keepdims=True)
    bbfeat = bfeat / np.linalg.norm(bfeat, ord=2, axis=1, keepdims=True)

    cost_matrix = 1 - np.maximum(0.0, cdist(aafeat, bbfeat, 'cosine'))

    print("Done!!!")

def main():
    imgpath = r"D:\datasets\ym\Img_ResnetData\20240531-103547_0354b1cb-53fa-48de-86cd-ac3c5b127ada_6921168593576\3568800050000_0.jpeg"
    datapath = r"D:\datasets\ym\Img_ResnetData\20240531-103547_0354b1cb-53fa-48de-86cd-ac3c5b127ada_6921168593576\0_tracker_inout.data"
    savepath = r"D:\datasets\ym\Img_ResnetData\result"

    image = cv2.imread(imgpath)


    Videos = read_data_file(datapath)

    bboxes, afeats = Videos[0][0][0],  Videos[0][1][0]
    imgs, bfeats = inference_image(image, bboxes)


    aafeats = afeats / np.linalg.norm(afeats, ord=2, axis=1, keepdims=True)
    bbfeats = bfeats / np.linalg.norm(bfeats, ord=2, axis=1, keepdims=True)


    cost_matrix = 1 - np.maximum(0.0, cdist(aafeats, bbfeats, 'cosine'))


    for i, img in enumerate(imgs):
        cv2.imwrite(savepath + f"\{i}.png", img)


    print("Done!!!!")


if __name__ == '__main__':
    # main()

    # readimg()

    test_img_feat()