import cv2
import os
import numpy as np
import mediapipe as mp

class hand_pose:
    def __init__(self, min_dc=0.45, min_tc=0.45, max_nh=1):
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=max_nh,
            min_detection_confidence=min_dc,
            min_tracking_confidence=min_tc
        )

    @staticmethod
    def img_show(img):
        cv2.imshow('MediaPipe Hands', img)
        cv2.waitKey(1)

    def draw(self, img):
        hand_locals = []
        h_re = self.hands.process(img)
        if h_re.multi_hand_landmarks:
            
            for hand in h_re.multi_handedness:
                hand_position = 'Left' if hand.classification[0].label == "Left" else 'Right'
            
            hand_local = []
            for hand_landmarks in h_re.multi_hand_landmarks:
                self.mp_drawing.draw_landmarks(img,
                                               hand_landmarks,
                                               self.mp_hands.HAND_CONNECTIONS)
                imgshow = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                # self.img_show(imgshow)
                cv2.imwrite(f"./images/{k}.png", imgshow)

                '''获取手部关键点坐标'''
                for i in range(21):
                    x = hand_landmarks.landmark[i].x * img.shape[1]
                    y = hand_landmarks.landmark[i].y * img.shape[0]
                    hand_local.append((x, y))
            
            hand_locals.append(hand_local)
        return hand_locals
    
    def get_hand_local(self, track, image):
        '''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]'''
        H, W, _ = np.shape(image)
        tlbr = track[:4].astype(np.int_)
        x1 = max(0, tlbr[0]-50)
        y1 = max(0, tlbr[1]-50)
        x2 = min(W - 1, tlbr[2]+50)
        y2 = min(H - 1, tlbr[3]+50)
        img = image[y1:y2, x1:x2, :]
        imgshow = img.copy()
        
        # hand_local = np.empty((0,2), dtype = np.int_)
        min_x, max_x = W-1, 0
        min_y, max_y = H-1, 0
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h_re = self.hands.process(img)
        if h_re.multi_hand_landmarks:
            for hand_landmarks in h_re.multi_hand_landmarks:
                for i in range(21):
                    x = hand_landmarks.landmark[i].x * img.shape[1]
                    y = hand_landmarks.landmark[i].y * img.shape[0]
                    # hand_local = np.concatenate([hand_local, np.array([[x1+x, y1+y]]).astype(np.int_)], axis=0)
                    
                    if min_x>x1+x: min_x=x1+x
                    if max_x<x1+x: max_x=x1+x
                    if min_y>y1+y: min_y=y1+y
                    if max_y<y1+y: max_y=y1+y
                    
                    
                self.mp_drawing.draw_landmarks(img, hand_landmarks, self.mp_hands.HAND_CONNECTIONS)
                imgshow = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        if min_x < max_x and min_y < max_y:
            hand_local = (min_x, min_y, max_x, max_y)
        else:
            hand_local = None       
        
        return hand_local, imgshow
    
    
    

if __name__ == "__main__":
    handpose = hand_pose()
    file = r"D:\datasets\ym\videos\标记视频\加购_双手放入_1.mp4"
    cap = cv2.VideoCapture(file)
    k = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if ret is False:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        hand_local = handpose.draw(frame)
        k += 1
    
    cap.release()
    # cv2.destroyAllWindows()