diff --git a/contrast/.idea/.gitignore b/contrast/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/contrast/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/contrast/.idea/contrastInference.iml b/contrast/.idea/contrastInference.iml new file mode 100644 index 0000000..6d6038d --- /dev/null +++ b/contrast/.idea/contrastInference.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/contrast/.idea/deployment.xml b/contrast/.idea/deployment.xml new file mode 100644 index 0000000..b7f9a78 --- /dev/null +++ b/contrast/.idea/deployment.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/contrast/.idea/inspectionProfiles/Project_Default.xml b/contrast/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..920d523 --- /dev/null +++ b/contrast/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/contrast/.idea/inspectionProfiles/profiles_settings.xml b/contrast/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/contrast/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/contrast/.idea/misc.xml b/contrast/.idea/misc.xml new file mode 100644 index 0000000..3afa107 --- /dev/null +++ b/contrast/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/contrast/.idea/modules.xml b/contrast/.idea/modules.xml new file mode 100644 index 0000000..316bf04 --- /dev/null +++ b/contrast/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/contrast/__init__.py b/contrast/__init__.py new file mode 100644 index 0000000..e3e0f3f --- /dev/null +++ b/contrast/__init__.py @@ -0,0 +1 @@ +# from .config import config \ No newline at end of file diff --git a/contrast/__pycache__/__init__.cpython-39.pyc b/contrast/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..f894aad Binary files /dev/null and b/contrast/__pycache__/__init__.cpython-39.pyc differ diff --git a/contrast/__pycache__/config.cpython-38.pyc b/contrast/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000..c6934bc Binary files /dev/null and b/contrast/__pycache__/config.cpython-38.pyc differ diff --git a/contrast/__pycache__/config.cpython-39.pyc b/contrast/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..ba23f1d Binary files /dev/null and b/contrast/__pycache__/config.cpython-39.pyc differ diff --git a/contrast/__pycache__/inference.cpython-39.pyc b/contrast/__pycache__/inference.cpython-39.pyc new file mode 100644 index 0000000..54fce2e Binary files /dev/null and b/contrast/__pycache__/inference.cpython-39.pyc differ diff --git a/contrast/config.py b/contrast/config.py new file mode 100644 index 0000000..aea7680 --- /dev/null +++ b/contrast/config.py @@ -0,0 +1,84 @@ +import torch +import torchvision.transforms as T + + +class Config: + # network settings + backbone = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5] + metric = 'softmax' # [cosface, arcface, softmax] + cbam = True + embedding_size = 256 # 256 + drop_ratio = 0.5 + img_size = 224 + + teacher = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5] + student = 'resnet' + # data preprocess + # input_shape = [1, 128, 128] + """transforms.RandomCrop(size), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomHorizontalFlip(), + RandomRotate(15, 0.3), + # RandomGaussianBlur()""" + + train_transform = T.Compose([ + T.ToTensor(), + T.Resize((img_size, img_size)), + # T.RandomCrop(img_size*4//5), + # T.RandomHorizontalFlip(p=0.5), + T.RandomRotation(180), + T.ColorJitter(brightness=0.5), + T.ConvertImageDtype(torch.float32), + T.Normalize(mean=[0.5], std=[0.5]), + ]) + test_transform = T.Compose([ + T.ToTensor(), + T.Resize((img_size, img_size)), + T.ConvertImageDtype(torch.float32), + T.Normalize(mean=[0.5], std=[0.5]), + ]) + + # dataset + train_root = './data/2250_train/train' # 初始筛选过一次的数据集 + # train_root = './data/0625_train/train' + test_root = "./data/2250_train/val/" + # test_root = "./data/0625_train/val" + + test_list = "./data/2250_train/val_pair.txt" + test_group_json = "./data/2250_train/cross_same.json" + # test_group_json = "./data/0625_train/cross_same.json" + # test_list = "./data/test_data_100/val_pair.txt" + + # training settings + checkpoints = "checkpoints/vit_b_16_0815/" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3] + restore = True + # restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth" # best_resnet18_1491_0306.pth + restore_model = "checkpoints/vit_b_16_0730/best.pth" # best_resnet18_1491_0306.pth + + # test_model = "./checkpoints/renet18_1887_0311/best_resnet18_1887_0311.pth" + testbackbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5] + # test_val = "./data/2250_train" + test_val = "./data/0625_train" + test_model = "checkpoints/resnet18_0721/best.pth" + + train_batch_size = 128 # 256 + test_batch_size = 256 # 256 + + + epoch = 300 + optimizer = 'adamw' # ['sgd', 'adam', 'adamw'] + lr = 1e-3 # 1e-2 + lr_step = 10 # 10 + lr_decay = 0.95 # 0.98 + weight_decay = 5e-4 + loss = 'focal_loss' # ['focal_loss', 'cross_entropy'] + device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') + # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + pin_memory = True # if memory is large, set it True to speed up a bit + num_workers = 4 # dataloader + + group_test = True + # group_test = False + +config = Config() \ No newline at end of file diff --git a/contrast/contrast_one2one.py b/contrast/contrast_one2one.py new file mode 100644 index 0000000..42b9a2a --- /dev/null +++ b/contrast/contrast_one2one.py @@ -0,0 +1,380 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 30 17:53:03 2024 + +1. 确认在相同CamerType下,track.data 中 CamerID 项数量 = 图像数 = 帧ID数 = 最大帧ID + +2. 读取0/1_tracking_output.data 中数据,boxes、feats,len(boxes)=len(feats) + 帧ID约束 + +3. 优先选择前摄 + +4. 保存图像数据 + +5. 一次购物事件类型 + shopEvent: {barcode: + type: getout, input + front_traj:[{imgpath: str, + box: arrar(1, 9), + feat: array(1, 256) + }] + back_traj: [{imgpath: str, + box: arrar(1, 9), + feat: array(1, 256) + }] + } + + + +@author: ym + +""" +import numpy as np +import cv2 +import os +import sys + +import pickle +import torch +import time +import json +from config import config as conf +from model import resnet18 +from inference import load_contrast_model +from inference import featurize + + +sys.path.append(r"D:\DetectTracking") +from tracking.utils.read_data import extract_data, read_tracking_output, read_deletedBarcode_file + +IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png'] + +model = load_contrast_model() + +def creat_shopping_event(basepath): + eventList = [] + + '''一、构造放入商品事件列表''' + k = 0 + for filename in os.listdir(basepath): + # filename = "20240723-155413_6904406215720" + + '''filename下为一次购物事件''' + filepath = os.path.join(basepath, filename) + + '''================ 0. 检查 filename 及 filepath 正确性和有效性 ================''' + nmlist = filename.split('_') + if filename.find('2024')<0 or len(nmlist)!=2 or len(nmlist[0])!=15 or len(nmlist[1])<11: + continue + if not os.path.isdir(filepath): continue + print(f"Event name: {filename}") + + '''================ 1. 构造事件描述字典,暂定 9 items ===============''' + event = {} + event['barcode'] = nmlist[1] + event['type'] = 'input' + event['filepath'] = filepath + event['back_imgpaths'] = [] + event['front_imgpaths'] = [] + event['back_boxes'] = np.empty((0, 9), dtype=np.float64) + event['front_boxes'] = np.empty((0, 9), dtype=np.float64) + event['back_feats'] = np.empty((0, 256), dtype=np.float64) + event['front_feats'] = np.empty((0, 256), dtype=np.float64) + # event['feats_compose'] = np.empty((0, 256), dtype=np.float64) + # event['feats_select'] = np.empty((0, 256), dtype=np.float64) + + + '''================= 1. 读取 data 文件 =============================''' + for dataname in os.listdir(filepath): + # filename = '1_track.data' + datapath = os.path.join(filepath, dataname) + if not os.path.isfile(datapath): continue + + CamerType = dataname.split('_')[0] + ''' 3.1 读取 0/1_track.data 中数据,暂不考虑''' + # if dataname.find("_track.data")>0: + # bboxes, ffeats, trackerboxes, tracker_feat_dict, trackingboxes, tracking_feat_dict = extract_data(datapath) + + ''' 3.2 读取 0/1_tracking_output.data 中数据''' + if dataname.find("_tracking_output.data")>0: + tracking_output_boxes, tracking_output_feats = read_tracking_output(datapath) + if len(tracking_output_boxes) != len(tracking_output_feats): continue + if CamerType == '0': + event['back_boxes'] = tracking_output_boxes + event['back_feats'] = tracking_output_feats + elif CamerType == '1': + event['front_boxes'] = tracking_output_boxes + event['front_feats'] = tracking_output_feats + + # '''1.1 事件的特征表征方式选择''' + # bk_feats = event['back_feats'] + # ft_feats = event['front_feats'] + + # feats_compose = np.empty((0, 256), dtype=np.float64) + # if len(ft_feats): + # feats_compose = np.concatenate((feats_compose, ft_feats), axis=0) + # if len(bk_feats): + # feats_compose = np.concatenate((feats_compose, bk_feats), axis=0) + # event['feats_compose'] = feats_compose + + # '''3. 构造前摄特征''' + # if len(ft_feats): + # event['feats_select'] = ft_feats + + + + '''================ 2. 读取图像文件地址,并按照帧ID排序 =============''' + frontImgs, frontFid = [], [] + backImgs, backFid = [], [] + for imgname in os.listdir(filepath): + name, ext = os.path.splitext(imgname) + if ext not in IMG_FORMAT or name.find('frameId')<0: continue + + CamerType = name.split('_')[0] + frameId = int(name.split('_')[3]) + imgpath = os.path.join(filepath, imgname) + if CamerType == '0': + backImgs.append(imgpath) + backFid.append(frameId) + if CamerType == '1': + frontImgs.append(imgpath) + frontFid.append(frameId) + + frontIdx = np.argsort(np.array(frontFid)) + backIdx = np.argsort(np.array(backFid)) + + '''2.1 生成依据帧 ID 排序的前后摄图像地址列表''' + frontImgs = [frontImgs[i] for i in frontIdx] + backImgs = [backImgs[i] for i in backIdx] + + '''2.2 将前、后摄图像路径添加至事件字典''' + bfid = event['back_boxes'][:, 7].astype(np.int64) + ffid = event['front_boxes'][:, 7].astype(np.int64) + if len(bfid) and max(bfid) <= len(backImgs): + event['back_imgpaths'] = [backImgs[i-1] for i in bfid] + if len(ffid) and max(ffid) <= len(frontImgs): + event['front_imgpaths'] = [frontImgs[i-1] for i in ffid] + + + '''================ 3. 判断当前事件有效性,并添加至事件列表 ==========''' + condt1 = len(event['back_imgpaths'])==0 or len(event['front_imgpaths'])==0 + condt2 = len(event['front_feats'])==0 and len(event['back_feats'])==0 + + if condt1 or condt2: + print(f" Error, condt1: {condt1}, condt2: {condt2}") + continue + + eventList.append(event) + + # k += 1 + # if k==1: + # continue + + '''一、构造放入商品事件列表,暂不处理''' + # delepath = os.path.join(basepath, 'deletedBarcode.txt') + # bcdList = read_deletedBarcode_file(delepath) + # for slist in bcdList: + # getoutFold = slist['SeqDir'].strip() + # getoutPath = os.path.join(basepath, getoutFold) + + # '''取出事件文件夹不存在,跳出循环''' + # if not os.path.exists(getoutPath) and not os.path.isdir(getoutPath): + # continue + + # ''' 生成取出事件字典 ''' + # event = {} + # event['barcode'] = slist['Deleted'].strip() + # event['type'] = 'getout' + # event['basepath'] = getoutPath + + + return eventList + +def get_std_barcodeDict(bcdpath): + stdBlist = [] + for filename in os.listdir(bcdpath): + filepath = os.path.join(bcdpath, filename) + if not os.path.isdir(filepath) or not filename.isdigit(): continue + + stdBlist.append(filename) + + + bcdpaths = [(barcode, os.path.join(bcdpath, barcode)) for barcode in stdBlist] + + k = 0 + for barcode, bpath in bcdpaths: + stdBarcodeDict = {} + stdBarcodeDict[barcode] = [] + for root, dirs, files in os.walk(bpath): + + imgpaths = [] + if "base" in dirs: + broot = os.path.join(root, "base") + for imgname in os.listdir(broot): + imgpath = os.path.join(broot, imgname) + _, ext = os.path.splitext(imgpath) + if ext not in IMG_FORMAT: continue + imgpaths.append(imgpath) + + stdBarcodeDict[barcode].extend(imgpaths) + break + + else: + for imgname in files: + imgpath = os.path.join(root, imgname) + _, ext = os.path.splitext(imgpath) + if ext not in IMG_FORMAT: continue + imgpaths.append(imgpath) + stdBarcodeDict[barcode].extend(imgpaths) + + jsonpath = os.path.join(r'\\192.168.1.28\share\测试_202406\contrast\barcodes', f"{barcode}.pickle") + with open(jsonpath, 'wb') as f: + pickle.dump(stdBarcodeDict, f) + + print(f"Barcode: {barcode}") + + k += 1 + if k == 10: + break + + + + + return stdBarcodeDict + + +def one2one_test(filepath): + + savepath = r'\\192.168.1.28\share\测试_202406\contrast' + + '''获得 Barcode 列表''' + bcdpath = r'\\192.168.1.28\share\已标注数据备份\对比数据\barcode\barcode_1771' + stdBarcodeDict = get_std_barcodeDict(bcdpath) + + + eventList = creat_shopping_event(filepath) + print("=========== eventList have generated! ===========") + barcodeDict = {} + for event in eventList: + '''9 items: barcode, type, filepath, back_imgpaths, front_imgpaths, + back_boxes, front_boxes, back_feats, front_feats + ''' + + barcode = event['barcode'] + if barcode not in stdBarcodeDict.keys(): + continue + + + if len(event['feats_select']): + event_feats = event['feats_select'] + elif len(event['back_feats']): + event_feats = event['back_feats'] + else: + continue + + std_bcdpath = os.path.join(bcdpath, barcode) + + + + for root, dirs, files in os.walk(std_bcdpath): + if "base" in files: + std_bcdpath = os.path.join(root, "base") + break + + + + + + + + + + + + + + '''保存一次购物事件的轨迹子图''' + basename = os.path.basename(event['filepath']) + spath = os.path.join(savepath, basename) + if not os.path.exists(spath): + os.makedirs(spath) + cameras = ('front', 'back') + for camera in cameras: + if camera == 'front': + boxes = event['front_boxes'] + imgpaths = event['front_imgpaths'] + else: + boxes = event['back_boxes'] + imgpaths = event['back_imgpaths'] + + for i, box in enumerate(boxes): + x1, y1, x2, y2, tid, score, cls, fid, bid = box + + imgpath = imgpaths[i] + image = cv2.imread(imgpath) + subimg = image[int(y1/2):int(y2/2), int(x1/2):int(x2/2), :] + + camerType, timeTamp, _, frameID = os.path.basename(imgpath).split('.')[0].split('_') + subimgName = f"{camerType}_{tid}_fid({fid}, {frameID}).png" + subimgPath = os.path.join(spath, subimgName) + + cv2.imwrite(subimgPath, subimg) + print(f"Image saved: {basename}") + + + +def batch_inference(imgpaths, batch): + size = len(imgpaths) + groups = [] + for i in range(0, size, batch): + end = min(batch + i, size) + groups.append(imgpaths[i: end]) + + features = [] + for group in groups: + feature = featurize(group, conf.test_transform, model, conf.device) + features.append(feature) + + return features + +def main_infer(): + + + + bpath = r"\\192.168.1.28\share\测试_202406\contrast\barcodes" + for filename in os.listdir(bpath): + filepath = os.path.join(bpath, filename) + + with open(filepath, 'rb') as f: + bpDict = pickle.load(f) + + for barcode, imgpaths in bpDict.items(): + feature = batch_inference(imgpaths, 8) + + print("Done!!!") + + + +def main(): + fplist = [#r'\\192.168.1.28\share\测试_202406\0723\0723_1', + #r'\\192.168.1.28\share\测试_202406\0723\0723_2', + r'\\192.168.1.28\share\测试_202406\0723\0723_3', + #r'\\192.168.1.28\share\测试_202406\0722\0722_01', + #r'\\192.168.1.28\share\测试_202406\0722\0722_02' + ] + + + + for filepath in fplist: + one2one_test(filepath) + + # for filepath in fplist: + # try: + # one2one_test(filepath) + + # except Exception as e: + # print(f'{filepath}, Error: {e}') + +if __name__ == '__main__': + # main() + main_infer() \ No newline at end of file diff --git a/contrast/inference.py b/contrast/inference.py new file mode 100644 index 0000000..e7a6dbc --- /dev/null +++ b/contrast/inference.py @@ -0,0 +1,103 @@ +import os +import os.path as osp + +import torch + +import numpy as np +from model import resnet18 +from PIL import Image + +from torch.nn.functional import softmax +from config import config as conf +import time + +embedding_size = conf.embedding_size +img_size = conf.img_size +device = conf.device + +def load_contrast_model(): + model = resnet18().to(conf.device) + model.load_state_dict(torch.load(conf.test_model, map_location=conf.device)) + model.eval() + print('load model {} '.format(conf.testbackbone)) + + return model + + +def group_image(imageDirs, batch) -> list: + images = [] + """Group image paths by batch size""" + with os.scandir(imageDirs) as entries: + for imgpth in entries: + print(imgpth) + images.append(os.sep.join([imageDirs, imgpth.name])) + print(f"{len(images)} images in {imageDirs}") + size = len(images) + res = [] + for i in range(0, size, batch): + end = min(batch + i, size) + res.append(images[i: end]) + return res + +def test_preprocess(images: list, transform) -> torch.Tensor: + res = [] + for img in images: + # print(img) + im = Image.open(img) + im = transform(im) + res.append(im) + # data = torch.cat(res, dim=0) # shape: (batch, 128, 128) + # data = data[:, None, :, :] # shape: (batch, 1, 128, 128) + data = torch.stack(res) + return data + +def featurize(images: list, transform, net, device) -> dict: + """featurize each image and save into a dictionary + Args: + images: image paths + transform: test transform + net: pretrained model + device: cpu or cuda + Returns: + Dict (key: imagePath, value: feature) + """ + data = test_preprocess(images, transform) + data = data.to(device) + net = net.to(device) + with torch.no_grad(): + features = net(data) + # res = {img: feature for (img, feature) in zip(images, features)} + return features + + + +if __name__ == '__main__': + # Network Setup + if conf.testbackbone == 'resnet18': + model = resnet18().to(device) + else: + raise ValueError('Have not model {}'.format(conf.backbone)) + + print('load model {} '.format(conf.testbackbone)) + # model = nn.DataParallel(model).to(conf.device) + model.load_state_dict(torch.load(conf.test_model, map_location=conf.device)) + model.eval() + + # images = unique_image(conf.test_list) + # images = [osp.join(conf.test_val, img) for img in images] + # print('images', images) + # images = ['./data/2250_train/val/6920616313186/6920616313186_6920616313186_20240220-124502_53d2e103-ae3a-4689-b745-9d8723b770fe_front_returnGood_70f75407b7ae_31_01.jpg'] + + + # groups = group_image(conf.test_val, conf.test_batch_size) ##根据batch_size取图片 + groups = group_image('img_test', 1) ##根据batch_size取图片, 默认batch_size = 8 + + feature_dict = dict() + for group in groups: + s = time.time() + features = featurize(group, conf.test_transform, model, conf.device) + e = time.time() + print('time: {}'.format(e - s)) + # out = softmax(features, dim=1).argmax(dim=1) + # print('d >>> {}'. format(out)) + # feature_dict.update(d) diff --git a/contrast/model/__init__.py b/contrast/model/__init__.py new file mode 100644 index 0000000..9eebc77 --- /dev/null +++ b/contrast/model/__init__.py @@ -0,0 +1 @@ +from .resnet_pre import resnet18, resnet34, resnet50, resnet14 diff --git a/contrast/model/__pycache__/__init__.cpython-38.pyc b/contrast/model/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..160bc37 Binary files /dev/null and b/contrast/model/__pycache__/__init__.cpython-38.pyc differ diff --git a/contrast/model/__pycache__/__init__.cpython-39.pyc b/contrast/model/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..539ec67 Binary files /dev/null and b/contrast/model/__pycache__/__init__.cpython-39.pyc differ diff --git a/contrast/model/__pycache__/resnet_pre.cpython-38.pyc b/contrast/model/__pycache__/resnet_pre.cpython-38.pyc new file mode 100644 index 0000000..199085d Binary files /dev/null and b/contrast/model/__pycache__/resnet_pre.cpython-38.pyc differ diff --git a/contrast/model/__pycache__/resnet_pre.cpython-39.pyc b/contrast/model/__pycache__/resnet_pre.cpython-39.pyc new file mode 100644 index 0000000..0301f55 Binary files /dev/null and b/contrast/model/__pycache__/resnet_pre.cpython-39.pyc differ diff --git a/contrast/model/resnet_pre.py b/contrast/model/resnet_pre.py new file mode 100644 index 0000000..5e52ad9 --- /dev/null +++ b/contrast/model/resnet_pre.py @@ -0,0 +1,462 @@ +import torch +import torch.nn as nn +from config import config as conf + +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url +# from .utils import load_state_dict_from_url + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', + 'wide_resnet50_2', 'wide_resnet101_2'] + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', + 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', + 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class SpatialAttention(nn.Module): + def __init__(self, kernel_size=7): + super(SpatialAttention, self).__init__() + + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + + self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + avg_out = torch.mean(x, dim=1, keepdim=True) + max_out, _ = torch.max(x, dim=1, keepdim=True) + x = torch.cat([avg_out, max_out], dim=1) + x = self.conv1(x) + return self.sigmoid(x) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None, cam=False, bam=False): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + self.cam = cam + self.bam = bam + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + if self.cam: + if planes == 64: + self.globalAvgPool = nn.AvgPool2d(56, stride=1) + elif planes == 128: + self.globalAvgPool = nn.AvgPool2d(28, stride=1) + elif planes == 256: + self.globalAvgPool = nn.AvgPool2d(14, stride=1) + elif planes == 512: + self.globalAvgPool = nn.AvgPool2d(7, stride=1) + + self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16)) + self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes) + self.sigmod = nn.Sigmoid() + if self.bam: + self.bam = SpatialAttention() + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + if self.cam: + ori_out = self.globalAvgPool(out) + out = out.view(out.size(0), -1) + out = self.fc1(out) + out = self.relu(out) + out = self.fc2(out) + out = self.sigmod(out) + out = out.view(out.size(0), out.size(-1), 1, 1) + out = out * ori_out + + if self.bam: + out = out*self.bam(out) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None, cam=False, bam=False): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + self.cam = cam + self.bam = bam + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + if self.cam: + if planes == 64: + self.globalAvgPool = nn.AvgPool2d(56, stride=1) + elif planes == 128: + self.globalAvgPool = nn.AvgPool2d(28, stride=1) + elif planes == 256: + self.globalAvgPool = nn.AvgPool2d(14, stride=1) + elif planes == 512: + self.globalAvgPool = nn.AvgPool2d(7, stride=1) + + self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4)) + self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion) + self.sigmod = nn.Sigmoid() + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + if self.cam: + ori_out = self.globalAvgPool(out) + out = out.view(out.size(0), -1) + out = self.fc1(out) + out = self.relu(out) + out = self.fc2(out) + out = self.sigmod(out) + out = out.view(out.size(0), out.size(-1), 1, 1) + out = out * ori_out + out += identity + out = self.relu(out) + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None, scale=0.75): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, int(64*scale), layers[0]) + self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + return nn.Sequential(*layers) + + def _forward_impl(self, x): + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + # print('poolBefore', x.shape) + x = self.avgpool(x) + # print('poolAfter', x.shape) + x = torch.flatten(x, 1) + # print('fcBefore',x.shape) + x = self.fc(x) + + # print('fcAfter',x.shape) + + return x + + def forward(self, x): + return self._forward_impl(x) + + +# def _resnet(arch, block, layers, pretrained, progress, **kwargs): +# model = ResNet(block, layers, **kwargs) +# if pretrained: +# state_dict = load_state_dict_from_url(model_urls[arch], +# progress=progress) +# model.load_state_dict(state_dict, strict=False) +# return model +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + + src_state_dict = state_dict + target_state_dict = model.state_dict() + skip_keys = [] + # skip mismatch size tensors in case of pretraining + for k in src_state_dict.keys(): + if k not in target_state_dict: + continue + if src_state_dict[k].size() != target_state_dict[k].size(): + skip_keys.append(k) + for k in skip_keys: + del src_state_dict[k] + missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False) + + return model + + +def resnet14(pretrained=True, progress=True, **kwargs): + r"""ResNet-14 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress, + **kwargs) + + +def resnet18(pretrained=True, progress=True, **kwargs): + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained=False, progress=True, **kwargs): + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained=False, progress=True, **kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained=False, progress=True, **kwargs): + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained=False, progress=True, **kwargs): + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) diff --git a/tracking/contrast_one2one.py b/tracking/contrast_one2one.py index bb7cc3f..1625275 100644 --- a/tracking/contrast_one2one.py +++ b/tracking/contrast_one2one.py @@ -34,6 +34,7 @@ import cv2 import os import sys import json +import pickle sys.path.append(r"D:\DetectTracking") from tracking.utils.read_data import extract_data, read_tracking_output, read_deletedBarcode_file @@ -213,9 +214,12 @@ def get_std_barcodeDict(bcdpath): if ext not in IMG_FORMAT: continue imgpaths.append(imgpath) stdBarcodeDict[barcode].extend(imgpaths) - - with open('stdBarcodeDict.json', 'wb') as f: - json.dump(stdBarcodeDict, f) + + jsonpath = os.path.join(r'\\192.168.1.28\share\测试_202406\contrast\barcodes', f"{barcode}.pickle") + with open(jsonpath, 'wb') as f: + pickle.dump(stdBarcodeDict, f) + + print(f"Barcode: {barcode}")