# -*- coding: utf-8 -*- """ Created on Fri Jul 5 13:59:21 2024 func: extract_data() 读取 Pipeline 各模块的数据,在 read_pipeline_data.py(马晓慧)的基础上完成接口改造 @author: ym """ import numpy as np import re import os def str_to_float_arr(s): # 移除字符串末尾的逗号(如果存在) if s.endswith(','): s = s[:-1] # 使用split()方法分割字符串,然后将每个元素转化为float float_array = [float(x) for x in s.split(",")] return float_array def find_samebox_in_array(arr, target): for i, st in enumerate(arr): if st[:4] == target[:4]: return i return -1 def extract_data(datapath): bboxes, ffeats = [], [] trackerboxes = np.empty((0, 9), dtype=np.float64) trackerfeats = np.empty((0, 256), dtype=np.float64) boxes, feats, tboxes, tfeats = [], [], [], [] with open(datapath, 'r', encoding='utf-8') as lines: for line in lines: line = line.strip() # 去除行尾的换行符和可能的空白字符 if not line: # 跳过空行 continue if line.find("CameraId")>=0: if len(boxes): bboxes.append(np.array(boxes)) if len(feats): ffeats.append(np.array(feats)) if len(tboxes): trackerboxes = np.concatenate((trackerboxes, np.array(tboxes))) if len(tfeats): trackerfeats = np.concatenate((trackerfeats, np.array(tfeats))) boxes, feats, tboxes, tfeats = [], [], [], [] if line.find("box:") >= 0 and line.find("output_box:") < 0: box = line[line.find("box:") + 4:].strip() boxes.append(str_to_float_arr(box)) if line.find("feat:") >= 0: feat = line[line.find("feat:") + 5:].strip() feats.append(str_to_float_arr(feat)) if line.find("output_box:") >= 0: box = str_to_float_arr(line[line.find("output_box:") + 11:].strip()) tboxes.append(box) # 去掉'output_box:'并去除可能的空白字符 index = find_samebox_in_array(boxes, box) if index >= 0: # feat_f = str_to_float_arr(input_feats[index]) feat_f = feats[index] norm_f = np.linalg.norm(feat_f) feat_f = feat_f / norm_f tfeats.append(feat_f) if len(boxes): bboxes.append(np.array(boxes)) if len(feats): ffeats.append(np.array(feats)) if len(tboxes): trackerboxes = np.concatenate((trackerboxes, np.array(tboxes))) if len(tfeats): trackerfeats = np.concatenate((trackerfeats, np.array(tfeats))) assert(len(bboxes)==len(ffeats)), "Error at Yolo output!" assert(len(trackerboxes)==len(trackerfeats)), "Error at tracker output!" tracker_feat_dict = {} for i in range(len(trackerboxes)): tid, fid, bid = int(trackerboxes[i, 4]), int(trackerboxes[i, 7]), int(trackerboxes[i, 8]) if f"frame_{fid}" not in tracker_feat_dict: tracker_feat_dict[f"frame_{fid}"]= {"feats": {}} tracker_feat_dict[f"frame_{fid}"]["feats"].update({bid: trackerfeats[i, :]}) boxes, trackingboxes= [], [] tracking_flag = False with open(datapath, 'r', encoding='utf-8') as lines: for line in lines: line = line.strip() # 去除行尾的换行符和可能的空白字符 if not line: # 跳过空行 continue if tracking_flag: if line.find("tracking_") >= 0: tracking_flag = False else: box = str_to_float_arr(line) boxes.append(box) if line.find("tracking_") >= 0: tracking_flag = True if len(boxes): trackingboxes.append(np.array(boxes)) boxes = [] if len(boxes): trackingboxes.append(np.array(boxes)) tracking_feat_dict = {} for i, boxes in enumerate(trackingboxes): for box in boxes: tid, fid, bid = int(box[4]), int(box[7]), int(box[8]) if f"track_{tid}" not in tracking_feat_dict: tracking_feat_dict[f"track_{tid}"]= {"feats": {}} tracking_feat_dict[f"track_{tid}"]["feats"].update({f"{fid}_{bid}": tracker_feat_dict[f"frame_{fid}"]["feats"][bid]}) return bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, tracking_feat_dict def read_tracking_output(filepath): boxes = [] feats = [] with open(filepath, 'r', encoding='utf-8') as file: for line in file: line = line.strip() # 去除行尾的换行符和可能的空白字符 if not line: continue if line.endswith(','): line = line[:-1] data = np.array([float(x) for x in line.split(",")]) if data.size == 9: boxes.append(data) if data.size == 256: feats.append(data) return np.array(boxes), np.array(feats) def read_deletedBarcode_file(filePth): with open(filePth, 'r', encoding='utf-8') as f: lines = f.readlines() split_flag, all_list = False, [] dict, barcode_list, similarity_list = {}, [], [] clean_lines = [line.strip().replace("'", '').replace('"', '') for line in lines] for line in clean_lines: stripped_line = line.strip() if not stripped_line: if len(barcode_list): dict['barcode'] = barcode_list if len(similarity_list): dict['similarity'] = similarity_list if len(dict): all_list.append(dict) split_flag = False dict, barcode_list, similarity_list = {}, [], [] continue # print(line) label = line.split(':')[0] value = line.split(':')[1] if label == 'SeqDir': dict['SeqDir'] = value if label == 'Deleted': dict['Deleted'] = value if label == 'List': split_flag = True continue if split_flag: barcode_list.append(label) similarity_list.append(value) if len(barcode_list): dict['barcode'] = barcode_list if len(similarity_list): dict['similarity'] = similarity_list if len(dict): all_list.append(dict) return all_list if __name__ == "__main__": files_path = 'D:/contrast/dataset/1_to_n/709/20240709-112658_6903148351833/' # 遍历目录下的所有文件和目录 for filename in os.listdir(files_path): filename = '1_track.data' file_path = os.path.join(files_path, filename) if os.path.isfile(file_path) and filename.find("track.data")>0: extract_data(file_path) print("Done")