37 Commits
V1.0 ... dev

Author SHA1 Message Date
9400ae904a llm_agent 2025-04-15 09:26:24 +08:00
ad850221c5 修改跟踪算法cpu gpu切换错误 2025-01-17 10:53:24 +08:00
31db54fa37 添加标准图像为None时候的判断 2025-01-15 15:26:59 +08:00
38d962cd59 Add Qwen agent 2025-01-14 17:24:07 +08:00
f797aea5d0 Add Qwen agent 2025-01-14 17:20:52 +08:00
a3b65be6b1 add Qwen interface 2025-01-14 17:18:39 +08:00
6ede9580cb add Qwen interface 2025-01-14 17:15:24 +08:00
2feedd622d {commit_message} 2025-01-14 15:03:49 +08:00
544eac9cec add network crop 2025-01-14 15:03:49 +08:00
415a804c9b add network image cropping 2025-01-14 15:03:49 +08:00
a64dcb5a10 {commit_message} 2025-01-14 15:03:49 +08:00
47a384131e {commit_message} 2025-01-14 14:56:05 +08:00
c7859a7792 add network image crop pipeline 2025-01-14 14:56:05 +08:00
a16235a593 add network image crop pipeline 2025-01-14 13:38:17 +08:00
744fb7b7b2 cpu to device select 2025-01-13 18:11:56 +08:00
a50f777839 modify pipeline.py 2025-01-13 17:35:15 +08:00
3d13b0d9c5 20250103 2025-01-03 15:04:25 +08:00
661489120b 20240102 2025-01-02 18:23:16 +08:00
7e13e0f5b4 last update in 2024 2024-12-31 16:45:04 +08:00
dac3b3f2b6 20241218 2024-12-18 17:35:24 +08:00
39f94c7bd4 20241217 2024-12-17 17:32:09 +08:00
afd033b965 modify at output data format 2024-12-10 19:01:54 +08:00
1e6c5deee4 modify 1:1 比对方式 2024-12-05 10:23:03 +08:00
8bbee310ba bakeup 2024-11-25 18:05:08 +08:00
c47894ddc0 abc 2024-11-08 08:52:56 +08:00
5ecc1285d4 update 2024-11-04 18:06:52 +08:00
dfb2272a15 pepeline_extract_subimg.py: extract subimg 2024-10-07 17:27:00 +08:00
390c5d2d94 guoqing bakeup 2024-10-04 12:12:44 +08:00
09e92d63b3 modify the 1:1 contrast code 2024-09-11 17:37:32 +08:00
7309dec166 contrast performance evaluatation have done! 2024-09-05 19:01:49 +08:00
f978d4174f update one2one contrast module 2024-09-03 18:45:07 +08:00
e00fb46847 update 20240902 2024-09-02 18:39:12 +08:00
0cc36ba920 bakeup 2024-09-02 11:50:08 +08:00
5109400a57 update for bakeup 2024-08-06 20:00:29 +08:00
27d57b21d4 for bakeup 2024-07-29 18:56:35 +08:00
16543107f3 update for bakeup 2024-07-22 20:16:45 +08:00
e986ec060b modified for site test 2024-07-18 17:52:12 +08:00
248 changed files with 72211 additions and 613 deletions

11
.gitignore vendored
View File

@ -1,14 +1,11 @@
# Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
*.jpg
*.jpeg
*.png
*.bmp
*.tif
*.tiff
*.heic
*.JPG
*.JPEG
*.PNG
*.BMP
*.TIF
*.TIFF
@ -23,9 +20,10 @@
*.rar
*.pkl
*.pickle
*.npy
*.csv
*.pyc
# for tracking ---------------------------------------------------------------
@ -38,6 +36,10 @@ tracking/data/boxes_imgs/*
tracking/data/trackfeats/*
tracking/data/tracks/*
tracking/data/handlocal/*
contrast/feat_extract/model/__pycache__/*
std_img*
.gitignore
*/__pycache__/*
ckpts/*
doc
@ -53,7 +55,6 @@ VOC/
# Neural Network weights -----------------------------------------------------------------------------------------------
*.weights
*.pt
*.pth
*.pb
*.onnx

176
Qwen_agent.py Normal file
View File

@ -0,0 +1,176 @@
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from accelerate import init_empty_weights, load_checkpoint_in_model
from stream_pipeline import stream_pipeline
from PIL import Image
from io import BytesIO
import torch
import ast
import requests
import random
# default: Load the model on the available device(s)
model = Qwen2VLForConditionalGeneration.from_pretrained(
"Qwen/Qwen2-VL-7B-Instruct",
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="auto"
)
# default processer
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", attn_implementation="flash_attention_2")
def qwen_prompt(img_list, messages):
# Preparation for inference
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = processor(
text=[text],
images=img_list,
padding=True,
return_tensors="pt",
)
inputs = inputs.to("cuda")
# Inference: Generation of the output
with torch.no_grad():
generated_ids = model.generate(**inputs, max_new_tokens=256)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, add_special_tokens=False, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
del inputs
del generated_ids
del generated_ids_trimmed
torch.cuda.empty_cache()
return output_text[0]
def get_best_image(track_imgs):
if len(track_imgs) >= 5:
track_imgs = random.sample(track_imgs, 5)
img_frames = []
for i in range(len(track_imgs)):
content = {}
content['type'] = 'image'
content['min_pixels'] = 224 * 224
content['max_pixels'] = 800 * 800
img_frames.append(content)
messages = [
{
"role": "system",
"content": "你是一个在超市工作的chatbot你现在需要帮助顾客找到一张质量最好的商品图像。一个好的商品图像需要满足以下条件: \
1. 文字清晰且连贯。\
2. 商品图案清晰可识别。\
3. 商品可提取的描述信息丰富。\
基于以上条件从多张图像中筛选出最好的图像然后以dict输出该图像的索引信息key为'index'"
},
{
"role": "system",
"content": img_frames,
},
]
output_text = qwen_prompt(track_imgs, messages)
output_dict = ast.literal_eval(output_text.strip('```python\n'))
if output_dict['index'] > len(track_imgs):
output_dict['index'] = len(track_imgs)
best_img = track_imgs[output_dict['index'] - 1]
return best_img
def get_product_description(std_img, track_imgs):
messages = [
{
"role": "system",
"content": "你是一个在超市工作的chatbot你现在需要提取图像中商品的信息信息需要按照以下python dict的格式输出如果 \
信息模糊不清则输出'未知': \
{ \
'item1': {\
'Text': 第一张图像中商品中提取出的文字信息, \
'Color': 第一张图像中商品的颜色, \
'Shape': 第一张图像中商品的形状, \
'Material': 第一张图像中商品的材质, \
'Category': 第一张图像中商品的类别, \
} \
'item2': {\
'Text': 第二张图像中商品中提取出的文字信息, \
'Color': 第二张图像中商品的颜色, \
'Shape': 第二张图像中商品的形状, \
'Material': 第二张图像中商品的材质, \
'Category': 第二张图像中商品的类别, \
} \
'is_Same': 首先判断'Color'是否一致如果不一致则返回False如果一致则判断是否以上两个dict的['Text', 'Shape', 'Material', 'Category']key中至少有3个相同则输出True\
否则输出False。 \
} \
"
},
{
"role": "user",
"content": [
{
"type": "image",
"min_pixels": 224 * 224,
"max_pixels": 800 * 800,
},
{
"type": "image",
"min_pixels": 224 * 224,
"max_pixels": 800 * 800,
},
],
},
# {
# "role": "user",
# "content": "以python dict的形式输出第二张图像的比对信息。"
# "content": "输出一个listlist的内容包含两张图像提取出的dict信息。"
# }
]
best_img = get_best_image(track_imgs)
if std_img is not None:
img_list = [std_img, best_img]
else:
img_list = [best_img, best_img]
output_text = qwen_prompt(img_list, messages)
contrast_pair = ast.literal_eval(output_text.strip('```python\n'))
return contrast_pair
def item_analysis(stream_dict):
track_imgs = stream_pipeline(stream_dict)
if len(track_imgs) == 0:
return {}
std_img = None
if stream_dict['goodsPic'] is not None:
# response = requests.get(stream_dict['goodsPic'])
# std_img = Image.open(BytesIO(response.content))
std_img = Image.open(stream_dict['goodsPic']).convert("RGB")
description_dict = get_product_description(std_img, track_imgs)
return description_dict
def main():
# sample input dict
stream_dict = {
"goodsName" : "优诺优丝黄桃果粒风味发酵乳",
"measureProperty" : 0,
"qty" : 1,
"price" : 25.9,
"weight": 560, # 单位克
"barcode": "6931806801024",
"video" : "https://ieemoo-ai.obs.cn-east-3.myhuaweicloud.com/videos/20231009/04/04_20231009-082149_21f2ca35-f2c2-4386-8497-3e7a3b407f03_4901872831197.mp4",
"goodsPic" : "https://ieemoo-storage.obs.cn-east-3.myhuaweicloud.com/lhpic/6931806801024.jpg",
"measureUnit" : "",
"goodsSpec" : "405g"
}
result = item_analysis(stream_dict)
print(result)
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

50
bclass.py Normal file
View File

@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 15 16:23:03 2024
@author: ym
"""
class CamEvent:
def __init__(self, datapath):
self.data_path = datapath
self.bboxes = None
self.bfeats = None
self.tboxes = None
self.tfeats = None
class ShopEvent:
def __init__(self, eventpath, stdpath):
self.barcode = ""
self.event_path = eventpath
self.event_type = self.get_event_type(eventpath)
self.FrontEvent = ""
self.BackEvent = ""
self.fusion_boxes = None
self.fusion_feats = None
self.stdfeats = self.get_stdfeats(stdpath)
self.weight = None
self.imu = None
def get_event_type(self, eventpath):
pass
def get_stdfeats(self, stdpath):
pass

7
contrast/__init__.py Normal file
View File

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 26 08:53:58 2024
@author: ym
"""

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

352
contrast/event_test.py Normal file
View File

@ -0,0 +1,352 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 16 18:56:18 2024
@author: ym
"""
import os
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.font_manager import FontProperties
from scipy.spatial.distance import cdist
from utils.event import ShoppingEvent, save_data
rcParams['font.sans-serif'] = ['SimHei'] # 用黑体显示中文
rcParams['axes.unicode_minus'] = False # 正确显示负号
'''*********** USearch ***********'''
def read_usearch():
stdFeaturePath = r"D:\contrast\stdlib\v11_test.json"
stdBarcode = []
stdlib = {}
with open(stdFeaturePath, 'r', encoding='utf-8') as f:
data = json.load(f)
for dic in data['total']:
barcode = dic['key']
feature = np.array(dic['value'])
stdBarcode.append(barcode)
stdlib[barcode] = feature
return stdlib
def get_eventlist():
'''
读取一次测试中的错误事件
'''
evtpaths = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\images"
text1 = "one2n_Error.txt"
text2 = "one2SN_Error.txt"
events = []
text = (text1, text2)
for txt in text:
txtfile = os.path.join(evtpaths, txt)
with open(txtfile, "r") as f:
lines = f.readlines()
for i, line in enumerate(lines):
line = line.strip()
if line:
fpath=os.path.join(evtpaths, line)
events.append(fpath)
events = list(set(events))
return events
def single_event():
events = get_eventlist()
'''定义当前事件存储地址及生成相应文件件'''
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\single_event"
for evtpath in events:
event = ShoppingEvent(evtpath)
save_data(event, resultPath)
print(event.evtname)
def get_topk_percent(data, k):
"""
获取数据中最大的 k% 的元素
"""
# 将数据转换为 NumPy 数组
if isinstance(data, list):
data = np.array(data)
percentile = np.percentile(data, 100-k)
top_k_percent = data[data >= percentile]
return top_k_percent
def cluster(data, thresh=0.15):
# data = np.array([0.1, 0.13, 0.7, 0.2, 0.8, 0.52, 0.3, 0.7, 0.85, 0.58])
# data = np.array([0.1, 0.13, 0.2, 0.3])
# data = np.array([0.1])
if isinstance(data, list):
data = np.array(data)
data1 = np.sort(data)
cluter, Cluters, = [data1[0]], []
for i in range(1, len(data1)):
if data1[i] - data1[i-1]< thresh:
cluter.append(data1[i])
else:
Cluters.append(cluter)
cluter = [data1[i]]
Cluters.append(cluter)
clt_center = []
for clt in Cluters:
## 是否应该在此处限制一个聚类中的最小轨迹样本数,应该将该因素放在轨迹分析中
# if len(clt)>=3:
# clt_center.append(np.mean(clt))
clt_center.append(np.mean(clt))
# print(clt_center)
return clt_center
def calc_simil(event, stdfeat):
def calsiml(feat1, feat2):
'''轨迹样本和标准特征集样本相似度的选择策略'''
matrix = 1 - cdist(feat1, feat2, 'cosine')
simi_max = []
for i in range(len(matrix)):
sim = np.mean(get_topk_percent(matrix[i, :], 75))
simi_max.append(sim)
cltc_max = cluster(simi_max)
Simi = max(cltc_max)
## cltc_max为空属于编程考虑不周应予以排查解决
# if len(cltc_max):
# Simi = max(cltc_max)
# else:
# Simi = 0 #不应该走到该处
return Simi
front_boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
front_feats = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
for i in range(len(event.front_boxes)):
front_boxes = np.concatenate((front_boxes, event.front_boxes[i]), axis=0)
front_feats = np.concatenate((front_feats, event.front_feats[i]), axis=0)
back_boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
back_feats = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
for i in range(len(event.back_boxes)):
back_boxes = np.concatenate((back_boxes, event.back_boxes[i]), axis=0)
back_feats = np.concatenate((back_feats, event.back_feats[i]), axis=0)
if len(front_feats):
front_simi = calsiml(front_feats, stdfeat)
if len(back_feats):
back_simi = calsiml(back_feats, stdfeat)
'''前后摄相似度融合策略'''
if len(front_feats) and len(back_feats):
diff_simi = abs(front_simi - back_simi)
if diff_simi>0.15:
Similar = max([front_simi, back_simi])
else:
Similar = (front_simi+back_simi)/2
elif len(front_feats) and len(back_feats)==0:
Similar = front_simi
elif len(front_feats)==0 and len(back_feats):
Similar = back_simi
else:
Similar = None # 在event.front_feats和event.back_feats同时为空时
return Similar
def simi_matrix():
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\single_event"
stdlib = read_usearch()
events = get_eventlist()
for evtpath in events:
evtname = os.path.basename(evtpath)
_, barcode = evtname.split("_")
# 生成事件与相应标准特征集
event = ShoppingEvent(evtpath)
stdfeat = stdlib[barcode]
Similar = calc_simil(event, stdfeat)
# 构造 boxes 子图存储路径
subimgpath = os.path.join(resultPath, f"{event.evtname}", "subimg")
if not os.path.exists(subimgpath):
os.makedirs(subimgpath)
histpath = os.path.join(resultPath, "simi_hist")
if not os.path.exists(histpath):
os.makedirs(histpath)
mean_values, max_values = [], []
cameras = ('front', 'back')
fig, ax = plt.subplots(2, 3, figsize=(16, 9), dpi=100)
kpercent = 25
for camera in cameras:
boxes = np.empty((0, 9), dtype=np.float64) ##和类doTracks兼容
evtfeat = np.empty((0, 256), dtype=np.float64) ##和类doTracks兼容
if camera == 'front':
for i in range(len(event.front_boxes)):
boxes = np.concatenate((boxes, event.front_boxes[i]), axis=0)
evtfeat = np.concatenate((evtfeat, event.front_feats[i]), axis=0)
imgpaths = event.front_imgpaths
else:
for i in range(len(event.back_boxes)):
boxes = np.concatenate((boxes, event.back_boxes[i]), axis=0)
evtfeat = np.concatenate((evtfeat, event.back_feats[i]), axis=0)
imgpaths = event.back_imgpaths
assert len(boxes)==len(evtfeat), f"Please check the Event: {evtname}"
if len(boxes)==0: continue
print(evtname)
matrix = 1 - cdist(evtfeat, stdfeat, 'cosine')
simi_1d = matrix.flatten()
simi_mean = np.mean(matrix, axis=1)
# simi_max = np.max(matrix, axis=1)
'''以相似度矩阵每一行最大的 k% 的相似度做均值计算'''
simi_max = []
for i in range(len(matrix)):
sim = np.mean(get_topk_percent(matrix[i, :], kpercent))
simi_max.append(sim)
mean_values.append(np.mean(matrix))
max_values.append(np.mean(simi_max))
diff_max_mean = np.mean(simi_max) - np.mean(matrix)
'''相似度统计特性图示'''
k =0
if camera == 'front': k = 1
'''********************* 相似度全体数据 *********************'''
ax[k, 0].hist(simi_1d, bins=60, range=(-0.2, 1), edgecolor='black')
ax[k, 0].set_xlim([-0.2, 1])
ax[k, 0].set_title(camera)
_, y_max = ax[k, 0].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 0].text(-0.1, 0.15*y_max, f"rng:{max(simi_1d)-min(simi_1d):.3f}", fontsize=18, color='b')
'''********************* 均值********************************'''
ax[k, 1].hist(simi_mean, bins=24, range=(-0.2, 1), edgecolor='black')
ax[k, 1].set_xlim([-0.2, 1])
ax[k, 1].set_title("mean")
_, y_max = ax[k, 1].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 1].text(-0.1, 0.15*y_max, f"rng:{max(simi_mean)-min(simi_mean):.3f}", fontsize=18, color='b')
'''********************* 最大值 ******************************'''
ax[k, 2].hist(simi_max, bins=24, range=(-0.2, 1), edgecolor='black')
ax[k, 2].set_xlim([-0.2, 1])
ax[k, 2].set_title("max")
_, y_max = ax[k, 2].get_ylim() # 获取y轴范围
'''相似度变动范围'''
ax[k, 2].text(-0.1, 0.15*y_max, f"rng:{max(simi_max)-min(simi_max):.3f}", fontsize=18, color='b')
'''绘制聚类中心'''
cltc_mean = cluster(simi_mean)
for value in cltc_mean:
ax[k, 1].axvline(x=value, color='m', linestyle='--', linewidth=3)
cltc_max = cluster(simi_max)
for value in cltc_max:
ax[k, 2].axvline(x=value, color='m', linestyle='--', linewidth=3)
'''绘制相似度均值与最大值均值'''
ax[k, 1].axvline(x=np.mean(matrix), color='r', linestyle='-', linewidth=3)
ax[k, 2].axvline(x=np.mean(simi_max), color='g', linestyle='-', linewidth=3)
'''绘制相似度最大值均值 - 均值'''
_, y_max = ax[k, 2].get_ylim() # 获取y轴范围
ax[k, 2].text(-0.1, 0.05*y_max, f"g-r={diff_max_mean:.3f}", fontsize=18, color='m')
plt.show()
# for i, box in enumerate(boxes):
# x1, y1, x2, y2, tid, score, cls, fid, bid = box
# imgpath = imgpaths[int(fid-1)]
# image = cv2.imread(imgpath)
# subimg = image[int(y1/2):int(y2/2), int(x1/2):int(x2/2), :]
# camerType, timeTamp, _, frameID = os.path.basename(imgpath).split('.')[0].split('_')
# subimgName = f"cam{camerType}_{i}_tid{int(tid)}_fid({int(fid)}, {frameID})_{simi_mean[i]:.3f}.png"
# imgpairs.append((subimgName, subimg))
# spath = os.path.join(subimgpath, subimgName)
# cv2.imwrite(spath, subimg)
# oldname = f"cam{camerType}_{i}_tid{int(tid)}_fid({int(fid)}, {frameID}).png"
# oldpath = os.path.join(subimgpath, oldname)
# if os.path.exists(oldpath):
# os.remove(oldpath)
if len(mean_values)==2:
mean_diff = abs(mean_values[1]-mean_values[0])
ax[0, 1].set_title(f"mean diff: {mean_diff:.3f}")
if len(max_values)==2:
max_values = abs(max_values[1]-max_values[0])
ax[0, 2].set_title(f"max diff: {max_values:.3f}")
try:
fig.suptitle(f"Similar: {Similar:.3f}", fontsize=16)
except Exception as e:
print(e)
print(f"Similar: {Similar}")
pltpath = os.path.join(subimgpath, f"hist_max_{kpercent}%_.png")
plt.savefig(pltpath)
pltpath1 = os.path.join(histpath, f"{evtname}_.png")
plt.savefig(pltpath1)
plt.close()
def main():
simi_matrix()
if __name__ == "__main__":
main()
# cluster()

View File

@ -0,0 +1,87 @@
import torch
import torchvision.transforms as T
class Config:
# network settings
backbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
metric = 'arcface' # [cosface, arcface]
cbam = True
embedding_size = 256
drop_ratio = 0.5
img_size = 224
batch_size = 8
# data preprocess
# input_shape = [1, 128, 128]
"""transforms.RandomCrop(size),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomHorizontalFlip(),
RandomRotate(15, 0.3),
# RandomGaussianBlur()"""
train_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
# T.RandomCrop(img_size),
# T.RandomHorizontalFlip(p=0.5),
T.RandomRotation(180),
T.ColorJitter(brightness=0.5),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
test_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
# dataset
train_root = './data/2250_train/train' # 初始筛选过一次的数据集
# train_root = './data/0612_train/train'
test_root = "./data/2250_train/val/"
# test_root = "./data/0612_train/val"
test_list = "./data/2250_train/val_pair.txt"
test_group_json = "./2250_train/cross_same_0508.json"
# test_list = "./data/test_data_100/val_pair.txt"
# training settings
checkpoints = "checkpoints/resnet18_0613/" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
restore = False
# restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth" # best_resnet18_1491_0306.pth
restore_model = "checkpoints/resnet18_0515/best.pth" # best_resnet18_1491_0306.pth
# test_model = "checkpoints/renet18_2250_0314/best_resnet18_2250_0314.pth"
testbackbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5]
test_val = "D:/比对/cl"
# test_val = "./data/test_data_100"
test_model = "checkpoints/zhanting_res_801.pth"
# test_model = "checkpoints/zhanting_res_801.pth"
train_batch_size = 512 # 256
test_batch_size = 256 # 256
epoch = 300
optimizer = 'sgd' # ['sgd', 'adam']
lr = 1.5e-2 # 1e-2
lr_step = 5 # 10
lr_decay = 0.95 # 0.98
weight_decay = 5e-4
loss = 'cross_entropy' # ['focal_loss', 'cross_entropy']
# device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
pin_memory = True # if memory is large, set it True to speed up a bit
num_workers = 4 # dataloader
group_test = True
config = Config()

View File

@ -0,0 +1,593 @@
# -*- coding: utf-8 -*-
"""
@author: LiChen
"""
# import pdb
# import shutil
import torch.nn as nn
# import statistics
import os
import numpy as np
from scipy.spatial.distance import cdist
import torch
import os.path as osp
from PIL import Image
import json
import matplotlib.pyplot as plt
from pathlib import Path
# import sys
# sys.path.append(r"D:\DetectTracking")
# from contrast.config import config as conf
# from contrast.model import resnet18
from .config import config as conf
from .model import resnet18
# from model import (mobilevit_s, resnet14, resnet18, resnet34, resnet50, mobilenet_v2,
# MobileNetV3_Small, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5)
curpath = Path(__file__).resolve().parents[0]
class FeatsInterface:
def __init__(self, conf):
self.device = conf.device
# if conf.backbone == 'resnet18':
# model = resnet18().to(conf.device)
model = resnet18().to(conf.device)
self.transform = conf.test_transform
self.batch_size = conf.batch_size
self.embedding_size = conf.embedding_size
if conf.test_model.find("zhanting") == -1:
model = nn.DataParallel(model).to(conf.device)
self.model = model
modpath = os.path.join(curpath, conf.test_model)
self.model.load_state_dict(torch.load(modpath, map_location=conf.device))
self.model.eval()
print('load model {} '.format(conf.testbackbone))
def inference(self, images, detections=None):
'''
如果是BGR需要转变为RGB格式
'''
if isinstance(images, np.ndarray):
imgs, features = self.inference_image(images, detections)
return imgs, features
batch_patches = []
patches = []
for i, img in enumerate(images):
img = img.copy()
patch = self.transform(img)
if str(self.device) != "cpu":
patch = patch.to(device=self.device).half()
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (i + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred=self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return features
def inference_image(self, image, detections):
H, W, _ = np.shape(image)
batch_patches = []
patches = []
imgs = []
for d in range(np.size(detections, 0)):
tlbr = detections[d, :4].astype(np.int_)
tlbr[0] = max(0, tlbr[0])
tlbr[1] = max(0, tlbr[1])
tlbr[2] = min(W - 1, tlbr[2])
tlbr[3] = min(H - 1, tlbr[3])
img = image[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2], :]
imgs.append(img)
img1 = img[:, :, ::-1].copy() # the model expects RGB inputs
patch = self.transform(img1)
# patch = patch.to(device=self.device).half()
if str(self.device) != "cpu":
patch = patch.to(device=self.device)
else:
patch = patch.to(device=self.device)
patches.append(patch)
if (d + 1) % self.batch_size == 0:
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
patches = []
if len(patches):
patches = torch.stack(patches, dim=0)
batch_patches.append(patches)
features = np.zeros((0, self.embedding_size))
for patches in batch_patches:
pred = self.model(patches)
pred[torch.isinf(pred)] = 1.0
feat = pred.cpu().data.numpy()
features = np.vstack((features, feat))
return imgs, features
def unique_image(pair_list) -> set:
"""Return unique image path in pair_list.txt"""
with open(pair_list, 'r') as fd:
pairs = fd.readlines()
unique = set()
for pair in pairs:
id1, id2, _ = pair.split()
unique.add(id1)
unique.add(id2)
return unique
def group_image(images: set, batch) -> list:
"""Group image paths by batch size"""
images = list(images)
size = len(images)
res = []
for i in range(0, size, batch):
end = min(batch + i, size)
res.append(images[i: end])
return res
def _preprocess(images: list, transform) -> torch.Tensor:
res = []
for img in images:
im = Image.open(img)
im = transform(im)
res.append(im)
# data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
data = torch.stack(res)
return data
def test_preprocess(images: list, transform) -> torch.Tensor:
res = []
for img in images:
im = Image.open(img)
im = transform(im)
res.append(im)
# data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
data = torch.stack(res)
return data
def featurize(images: list, transform, net, device, train=False) -> dict:
"""featurize each image and save into a dictionary
Args:
images: image paths
transform: test transform
net: pretrained model
device: cpu or cuda
Returns:
Dict (key: imagePath, value: feature)
"""
if train:
data = _preprocess(images, transform)
data = data.to(device)
net = net.to(device)
with torch.no_grad():
features = net(data)
res = {img: feature for (img, feature) in zip(images, features)}
else:
data = test_preprocess(images, transform)
data = data.to(device)
net = net.to(device)
with torch.no_grad():
features = net(data)
res = {img: feature for (img, feature) in zip(images, features)}
return res
# def inference_image(images: list, transform, net, device, bs=16, embedding_size=256) -> dict:
# batch_patches = []
# patches = []
# for d, img in enumerate(images):
# img = Image.open(img)
# patch = transform(img)
# if str(device) != "cpu":
# patch = patch.to(device).half()
# else:
# patch = patch.to(device)
# patches.append(patch)
# if (d + 1) % bs == 0:
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
# patches = []
# if len(patches):
# patches = torch.stack(patches, dim=0)
# batch_patches.append(patches)
# features = np.zeros((0, embedding_size), dtype=np.float32)
# for patches in batch_patches:
# pred = net(patches)
# pred[torch.isinf(pred)] = 1.0
# feat = pred.cpu().data.numpy()
# features = np.vstack((features, feat))
# return features
def featurize_1(images: list, transform, net, device, train=False) -> dict:
"""featurize each image and save into a dictionary
Args:
images: image paths
transform: test transform
net: pretrained model
device: cpu or cuda
Returns:
Dict (key: imagePath, value: feature)
"""
data = test_preprocess(images, transform)
data = data.to(device)
net = net.to(device)
with torch.no_grad():
features = net(data).data.numpy()
return features
def cosin_metric(x1, x2):
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
def threshold_search(y_score, y_true):
y_score = np.asarray(y_score)
y_true = np.asarray(y_true)
best_acc = 0
best_th = 0
for i in range(len(y_score)):
th = y_score[i]
y_test = (y_score >= th)
acc = np.mean((y_test == y_true).astype(int))
if acc > best_acc:
best_acc = acc
best_th = th
return best_acc, best_th
def showgrid(recall, recall_TN, PrecisePos, PreciseNeg):
x = np.linspace(start=-1.0, stop=1.0, num=50, endpoint=True).tolist()
plt.figure(figsize=(10, 6))
plt.plot(x, recall, color='red', label='recall')
plt.plot(x, recall_TN, color='black', label='recall_TN')
plt.plot(x, PrecisePos, color='blue', label='PrecisePos')
plt.plot(x, PreciseNeg, color='green', label='PreciseNeg')
plt.legend()
plt.xlabel('threshold')
# plt.ylabel('Similarity')
plt.grid(True, linestyle='--', alpha=0.5)
plt.savefig('accuracy_recall_grid.png')
plt.show()
plt.close()
def compute_accuracy_recall(score, labels):
th = 0.1
squence = np.linspace(-1, 1, num=50)
# squence = [0.4]
recall, PrecisePos, PreciseNeg, recall_TN = [], [], [], []
for th in squence:
t_score = (score > th)
t_labels = (labels == 1)
# print(t_score)
# print(t_labels)
TP = np.sum(np.logical_and(t_score, t_labels))
FN = np.sum(np.logical_and(np.logical_not(t_score), t_labels))
f_score = (score < th)
f_labels = (labels == 0)
TN = np.sum(np.logical_and(f_score, f_labels))
FP = np.sum(np.logical_and(np.logical_not(f_score), f_labels))
print("Threshold:{} TP:{},FP:{},TN:{},FN:{}".format(th, TP, FP, TN, FN))
PrecisePos.append(0 if TP / (TP + FP) == 'nan' else TP / (TP + FP))
PreciseNeg.append(0 if TN == 0 else TN / (TN + FN))
recall.append(0 if TP == 0 else TP / (TP + FN))
recall_TN.append(0 if TN == 0 else TN / (TN + FP))
showgrid(recall, recall_TN, PrecisePos, PreciseNeg)
def compute_accuracy(feature_dict, pair_list, test_root):
with open(pair_list, 'r') as f:
pairs = f.readlines()
similarities = []
labels = []
for pair in pairs:
img1, img2, label = pair.split()
img1 = osp.join(test_root, img1)
img2 = osp.join(test_root, img2)
feature1 = feature_dict[img1].cpu().numpy()
feature2 = feature_dict[img2].cpu().numpy()
label = int(label)
similarity = cosin_metric(feature1, feature2)
similarities.append(similarity)
labels.append(label)
accuracy, threshold = threshold_search(similarities, labels)
# print('similarities >> {}'.format(similarities))
# print('labels >> {}'.format(labels))
compute_accuracy_recall(np.array(similarities), np.array(labels))
return accuracy, threshold
def deal_group_pair(pairList1, pairList2):
allsimilarity = []
one_similarity = []
for pair1 in pairList1:
for pair2 in pairList2:
similarity = cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy())
one_similarity.append(similarity)
allsimilarity.append(max(one_similarity)) # 最大值
# allsimilarity.append(sum(one_similarity)/len(one_similarity)) # 均值
# allsimilarity.append(statistics.median(one_similarity)) # 中位数
# print(allsimilarity)
# print(labels)
return allsimilarity
def compute_group_accuracy(content_list_read):
allSimilarity, allLabel= [], []
for data_loaded in content_list_read:
one_group_list = []
for i in range(2):
images = [osp.join(conf.test_val, img) for img in data_loaded[i]]
group = group_image(images, conf.test_batch_size)
d = featurize(group[0], conf.test_transform, model, conf.device)
one_group_list.append(d.values())
similarity = deal_group_pair(one_group_list[0], one_group_list[1])
allLabel.append(data_loaded[-1])
allSimilarity.extend(similarity)
# print(allSimilarity)
# print(allLabel)
return allSimilarity, allLabel
def compute_contrast_accuracy(content_list_read):
npairs = 50
same_folder_pairs = content_list_read['same_folder_pairs']
cross_folder_pairs = content_list_read['cross_folder_pairs']
npairs = min((len(same_folder_pairs), len(cross_folder_pairs)))
Encoder = FeatsInterface(conf)
same_pairs = same_folder_pairs[:npairs]
cross_pairs = cross_folder_pairs[:npairs]
same_pairs_similarity = []
for i in range(len(same_pairs)):
images_a = [osp.join(conf.test_val, img) for img in same_pairs[i][0]]
images_b = [osp.join(conf.test_val, img) for img in same_pairs[i][1]]
feats_a = Encoder.inference(images_a)
feats_b = Encoder.inference(images_b)
# matrix = 1- np.maximum(0.0, cdist(feats_a, feats_b, 'cosine'))
matrix = 1 - cdist(feats_a, feats_b, 'cosine')
feats_am = np.mean(feats_a, axis=0, keepdims=True)
feats_bm = np.mean(feats_b, axis=0, keepdims=True)
matrixm = 1- np.maximum(0.0, cdist(feats_am, feats_bm, 'cosine'))
same_pairs_similarity.append(np.mean(matrix))
'''保存相同 Barcode 图像对'''
# foldi = os.path.join('./result/same', f'{i}')
# if os.path.exists(foldi):
# shutil.rmtree(foldi)
# os.makedirs(foldi)
# else:
# os.makedirs(foldi)
# for ipt in range(len(images_a)):
# source_path = images_a[ipt]
# destination_path = os.path.join(foldi, f'a_{ipt}.png')
# shutil.copy2(source_path, destination_path)
# for ipt in range(len(images_b)):
# source_path = images_b[ipt]
# destination_path = os.path.join(foldi, f'b_{ipt}.png')
# shutil.copy2(source_path, destination_path)
cross_pairs_similarity = []
for i in range(len(cross_pairs)):
images_a = [osp.join(conf.test_val, img) for img in cross_pairs[i][0]]
images_b = [osp.join(conf.test_val, img) for img in cross_pairs[i][1]]
feats_a = Encoder.inference(images_a)
feats_b = Encoder.inference(images_b)
# matrix = 1- np.maximum(0.0, cdist(feats_a, feats_b, 'cosine'))
matrix = 1 - cdist(feats_a, feats_b, 'cosine')
feats_am = np.mean(feats_a, axis=0, keepdims=True)
feats_bm = np.mean(feats_b, axis=0, keepdims=True)
matrixm = 1- np.maximum(0.0, cdist(feats_am, feats_bm, 'cosine'))
cross_pairs_similarity.append(np.mean(matrix))
'''保存不同 Barcode 图像对'''
# foldi = os.path.join('./result/cross', f'{i}')
# if os.path.exists(foldi):
# shutil.rmtree(foldi)
# os.makedirs(foldi)
# else:
# os.makedirs(foldi)
# for ipt in range(len(images_a)):
# source_path = images_a[ipt]
# destination_path = os.path.join(foldi, f'a_{ipt}.png')
# shutil.copy2(source_path, destination_path)
# for ipt in range(len(images_b)):
# source_path = images_b[ipt]
# destination_path = os.path.join(foldi, f'b_{ipt}.png')
# shutil.copy2(source_path, destination_path)
Thresh = np.linspace(-0.2, 1, 100)
Same = np.array(same_pairs_similarity)
Cross = np.array(cross_pairs_similarity)
fig, axs = plt.subplots(2, 1)
axs[0].hist(Same, bins=60, edgecolor='black')
axs[0].set_xlim([-0.2, 1])
axs[0].set_title('Same Barcode')
axs[1].hist(Cross, bins=60, edgecolor='black')
axs[1].set_xlim([-0.2, 1])
axs[1].set_title('Cross Barcode')
TPFN = len(Same)
TNFP = len(Cross)
Recall_Pos, Recall_Neg = [], []
Precision_Pos, Precision_Neg = [], []
Correct = []
for th in Thresh:
TP = np.sum(Same > th)
FN = TPFN - TP
TN = np.sum(Cross < th)
FP = TNFP - TN
Recall_Pos.append(TP/TPFN)
Recall_Neg.append(TN/TNFP)
Precision_Pos.append(TP/(TP+FP))
Precision_Neg.append(TN/(TN+FN))
Correct.append((TN+TP)/(TPFN+TNFP))
fig, ax = plt.subplots()
ax.plot(Thresh, Correct, 'r', label='Correct: (TN+TP)/(TPFN+TNFP)')
ax.plot(Thresh, Recall_Pos, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, Recall_Neg, 'g', label='Recall_Neg: TN/TNFP')
ax.plot(Thresh, Precision_Pos, 'c', label='Precision_Pos: TP/(TP+FP)')
ax.plot(Thresh, Precision_Neg, 'm', label='Precision_Neg: TN/(TN+FN)')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('PrecisePos & PreciseNeg')
ax.legend()
plt.show()
print("Haved done!!!")
if __name__ == '__main__':
# Network Setup
if conf.testbackbone == 'resnet18':
# model = ResIRSE(conf.img_size, conf.embedding_size, conf.drop_ratio).to(conf.device)
model = resnet18().to(conf.device)
# elif conf.testbackbone == 'resnet34':
# model = resnet34().to(conf.device)
# elif conf.testbackbone == 'resnet50':
# model = resnet50().to(conf.device)
# elif conf.testbackbone == 'mobilevit_s':
# model = mobilevit_s().to(conf.device)
# elif conf.testbackbone == 'mobilenetv3':
# model = MobileNetV3_Small().to(conf.device)
# elif conf.testbackbone == 'mobilenet_v1':
# model = mobilenet_v1().to(conf.device)
# elif conf.testbackbone == 'PPLCNET_x1_0':
# model = PPLCNET_x1_0().to(conf.device)
# elif conf.testbackbone == 'PPLCNET_x0_5':
# model = PPLCNET_x0_5().to(conf.device)
# elif conf.backbone == 'PPLCNET_x2_5':
# model = PPLCNET_x2_5().to(conf.device)
# elif conf.testbackbone == 'mobilenet_v2':
# model = mobilenet_v2().to(conf.device)
# elif conf.testbackbone == 'resnet14':
# model = resnet14().to(conf.device)
else:
raise ValueError('Have not model {}'.format(conf.backbone))
print('load model {} '.format(conf.testbackbone))
# model = nn.DataParallel(model).to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
if not conf.group_test:
images = unique_image(conf.test_list)
images = [osp.join(conf.test_val, img) for img in images]
groups = group_image(images, conf.test_batch_size) ##根据batch_size取图片
feature_dict = dict()
for group in groups:
d = featurize(group, conf.test_transform, model, conf.device)
feature_dict.update(d)
# print('feature_dict', feature_dict)
accuracy, threshold = compute_accuracy(feature_dict, conf.test_list, conf.test_val)
print(
f"Test Model: {conf.test_model}\n"
f"Accuracy: {accuracy:.3f}\n"
f"Threshold: {threshold:.3f}\n"
)
elif conf.group_test:
"""
conf.test_val: 测试数据集地址
conf.test_group_json测试数据分组配置文件
"""
filename = conf.test_group_json
filename = "../cl/images_1.json"
with open(filename, 'r', encoding='utf-8') as file:
content_list_read = json.load(file)
compute_contrast_accuracy(content_list_read)
# =============================================================================
# Similarity, Label = compute_group_accuracy(content_list_read)
# print('allSimilarity >> {}'.format(Similarity))
# print('allLabel >> {}'.format(Label))
# compute_accuracy_recall(np.array(Similarity), np.array(Label))
# # compute_group_accuracy(data_loaded)
#
# =============================================================================

View File

@ -0,0 +1,88 @@
import torch.nn as nn
import torchvision
from torch.nn import init
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ChannelAttention(nn.Module):
def __int__(self, channel, reduction, num_layers):
super(ChannelAttention, self).__init__()
self.avgpool = nn.AdaptiveAvgPool2d(1)
gate_channels = [channel]
gate_channels += [len(channel) // reduction] * num_layers
gate_channels += [channel]
self.ca = nn.Sequential()
self.ca.add_module('flatten', Flatten())
for i in range(len(gate_channels) - 2):
self.ca.add_module('', nn.Linear(gate_channels[i], gate_channels[i + 1]))
self.ca.add_module('', nn.BatchNorm1d(gate_channels[i + 1]))
self.ca.add_module('', nn.ReLU())
self.ca.add_module('', nn.Linear(gate_channels[-2], gate_channels[-1]))
def forward(self, x):
res = self.avgpool(x)
res = self.ca(res)
res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
return res
class SpatialAttention(nn.Module):
def __int__(self, channel, reduction=16, num_lay=3, dilation=2):
super(SpatialAttention).__init__()
self.sa = nn.Sequential()
self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel // reduction) * 3))
self.sa.add_module('', nn.BatchNorm2d(num_features=(channel // reduction)))
self.sa.add_module('', nn.ReLU())
for i in range(num_lay):
self.sa.add_module('', nn.Conv2d(kernel_size=3,
in_channels=(channel // reduction),
out_channels=(channel // reduction),
padding=1,
dilation=2))
self.sa.add_module('', nn.BatchNorm2d(channel // reduction))
self.sa.add_module('', nn.ReLU())
self.sa.add_module('', nn.Conv2d(channel // reduction, 1, kernel_size=1))
def forward(self, x):
res = self.sa(x)
res = res.expand_as(x)
return res
class BAMblock(nn.Module):
def __init__(self, channel=512, reduction=16, dia_val=2):
super(BAMblock, self).__init__()
self.ca = ChannelAttention(channel, reduction)
self.sa = SpatialAttention(channel, reduction, dia_val)
self.sigmoid = nn.Sigmoid()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bais is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, _, _ = x.size()
sa_out = self.sa(x)
ca_out = self.ca(x)
weight = self.sigmoid(sa_out + ca_out)
out = (1 + weight) * x
return out
if __name__ == "__main__":
print(512 // 14)

View File

@ -0,0 +1,70 @@
import torch
import torch.nn as nn
import torch.nn.init as init
class channelAttention(nn.Module):
def __init__(self, channel, reduction=16):
super(channelAttention, self).__init__()
self.Maxpooling = nn.AdaptiveMaxPool2d(1)
self.Avepooling = nn.AdaptiveAvgPool2d(1)
self.ca = nn.Sequential()
self.ca.add_module('conv1',nn.Conv2d(channel, channel//reduction, 1, bias=False))
self.ca.add_module('Relu', nn.ReLU())
self.ca.add_module('conv2',nn.Conv2d(channel//reduction, channel, 1, bias=False))
self.sigmod = nn.Sigmoid()
def forward(self, x):
M_out = self.Maxpooling(x)
A_out = self.Avepooling(x)
M_out = self.ca(M_out)
A_out = self.ca(A_out)
out = self.sigmod(M_out+A_out)
return out
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super().__init__()
self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, padding=kernel_size // 2)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
max_result, _ = torch.max(x, dim=1, keepdim=True)
avg_result = torch.mean(x, dim=1, keepdim=True)
result = torch.cat([max_result, avg_result], dim=1)
output = self.conv(result)
output = self.sigmoid(output)
return output
class CBAM(nn.Module):
def __init__(self, channel, reduction=16, kernel_size=7):
super().__init__()
self.ca = channelAttention(channel, reduction)
self.sa = SpatialAttention(kernel_size)
def init_weights(self):
for m in self.modules():#权重初始化
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
# b,c_,_ = x.size()
# residual = x
out = x*self.ca(x)
out = out*self.sa(out)
return out
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
kernel_size=input.shape[2]
cbam = CBAM(channel=512,reduction=16,kernel_size=kernel_size)
output=cbam(input)
print(output.shape)

View File

@ -0,0 +1,33 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class GeM(nn.Module):
def __init__(self, p=3, eps=1e-6):
super(GeM, self).__init__()
self.p = nn.Parameter(torch.ones(1) * p)
self.eps = eps
def forward(self, x):
return self.gem(x, p=self.p, eps=self.eps, stride = 2)
def gem(self, x, p=3, eps=1e-6, stride = 2):
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1)), stride=2).pow(1. / p)
def __repr__(self):
return self.__class__.__name__ + \
'(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
', ' + 'eps=' + str(self.eps) + ')'
class TripletLoss(nn.Module):
def __init__(self, margin):
super(TripletLoss, self).__init__()
self.margin = margin
def forward(self, anchor, positive, negative, size_average = True):
distance_positive = (anchor-positive).pow(2).sum(1)
distance_negative = (anchor-negative).pow(2).sum(1)
losses = F.relu(distance_negative-distance_positive+self.margin)
return losses.mean() if size_average else losses.sum()
if __name__ == '__main__':
print('')

View File

@ -0,0 +1,11 @@
from .fmobilenet import FaceMobileNet
from .resnet_face import ResIRSE
from .mobilevit import mobilevit_s
from .metric import ArcFace, CosFace
from .loss import FocalLoss
from .resbam import resnet
from .resnet_pre import resnet18, resnet34, resnet50, resnet14
from .mobilenet_v2 import mobilenet_v2
from .mobilenet_v3 import MobileNetV3_Small, MobileNetV3_Large
# from .mobilenet_v1 import mobilenet_v1
from .lcnet import PPLCNET_x0_25, PPLCNET_x0_35, PPLCNET_x0_5, PPLCNET_x0_75, PPLCNET_x1_0, PPLCNET_x1_5, PPLCNET_x2_0, PPLCNET_x2_5

View File

@ -0,0 +1,124 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ConvBn(nn.Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
super().__init__()
self.net = nn.Sequential(
nn.Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_c)
)
def forward(self, x):
return self.net(x)
class ConvBnPrelu(nn.Module):
def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
super().__init__()
self.net = nn.Sequential(
ConvBn(in_c, out_c, kernel, stride, padding, groups),
nn.PReLU(out_c)
)
def forward(self, x):
return self.net(x)
class DepthWise(nn.Module):
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
super().__init__()
self.net = nn.Sequential(
ConvBnPrelu(in_c, groups, kernel=(1, 1), stride=1, padding=0),
ConvBnPrelu(groups, groups, kernel=kernel, stride=stride, padding=padding, groups=groups),
ConvBn(groups, out_c, kernel=(1, 1), stride=1, padding=0),
)
def forward(self, x):
return self.net(x)
class DepthWiseRes(nn.Module):
"""DepthWise with Residual"""
def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
super().__init__()
self.net = DepthWise(in_c, out_c, kernel, stride, padding, groups)
def forward(self, x):
return self.net(x) + x
class MultiDepthWiseRes(nn.Module):
def __init__(self, num_block, channels, kernel=(3, 3), stride=1, padding=1, groups=1):
super().__init__()
self.net = nn.Sequential(*[
DepthWiseRes(channels, channels, kernel, stride, padding, groups)
for _ in range(num_block)
])
def forward(self, x):
return self.net(x)
class FaceMobileNet(nn.Module):
def __init__(self, embedding_size):
super().__init__()
self.conv1 = ConvBnPrelu(1, 64, kernel=(3, 3), stride=2, padding=1)
self.conv2 = ConvBn(64, 64, kernel=(3, 3), stride=1, padding=1, groups=64)
self.conv3 = DepthWise(64, 64, kernel=(3, 3), stride=2, padding=1, groups=128)
self.conv4 = MultiDepthWiseRes(num_block=4, channels=64, kernel=3, stride=1, padding=1, groups=128)
self.conv5 = DepthWise(64, 128, kernel=(3, 3), stride=2, padding=1, groups=256)
self.conv6 = MultiDepthWiseRes(num_block=6, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
self.conv7 = DepthWise(128, 128, kernel=(3, 3), stride=2, padding=1, groups=512)
self.conv8 = MultiDepthWiseRes(num_block=2, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
self.conv9 = ConvBnPrelu(128, 512, kernel=(1, 1))
self.conv10 = ConvBn(512, 512, groups=512, kernel=(7, 7))
self.flatten = Flatten()
self.linear = nn.Linear(2048, embedding_size, bias=False)
self.bn = nn.BatchNorm1d(embedding_size)
def forward(self, x):
#print('x',x.shape)
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
out = self.conv4(out)
out = self.conv5(out)
out = self.conv6(out)
out = self.conv7(out)
out = self.conv8(out)
out = self.conv9(out)
out = self.conv10(out)
out = self.flatten(out)
out = self.linear(out)
out = self.bn(out)
return out
if __name__ == "__main__":
from PIL import Image
import numpy as np
x = Image.open("../samples/009.jpg").convert('L')
x = x.resize((128, 128))
x = np.asarray(x, dtype=np.float32)
x = x[None, None, ...]
x = torch.from_numpy(x)
net = FaceMobileNet(512)
net.eval()
with torch.no_grad():
out = net(x)
print(out.shape)

View File

@ -0,0 +1,233 @@
import os
import torch
import torch.nn as nn
import thop
# try:
# import softpool_cuda
# from SoftPool import soft_pool2d, SoftPool2d
# except ImportError:
# print('Please install SoftPool first: https://github.com/alexandrosstergiou/SoftPool')
# exit(0)
NET_CONFIG = {
# k, in_c, out_c, s, use_se
"blocks2": [[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}
def autopad(k, p=None):
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
return p
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class HardSwish(nn.Module):
def __init__(self, inplace=True):
super(HardSwish, self).__init__()
self.relu6 = nn.ReLU6(inplace=inplace)
def forward(self, x):
return x * self.relu6(x+3) / 6
class HardSigmoid(nn.Module):
def __init__(self, inplace=True):
super(HardSigmoid, self).__init__()
self.relu6 = nn.ReLU6(inplace=inplace)
def forward(self, x):
return (self.relu6(x+3)) / 6
class SELayer(nn.Module):
def __init__(self, channel, reduction=16):
super(SELayer, self).__init__()
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
HardSigmoid()
)
def forward(self, x):
b, c, h, w = x.size()
y = self.avgpool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y.expand_as(x)
class DepthwiseSeparable(nn.Module):
def __init__(self, inp, oup, dw_size, stride, use_se=False):
super(DepthwiseSeparable, self).__init__()
self.use_se = use_se
self.stride = stride
self.inp = inp
self.oup = oup
self.dw_size = dw_size
self.dw_sp = nn.Sequential(
nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
nn.BatchNorm2d(self.inp),
HardSwish(),
nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(self.oup),
HardSwish(),
)
self.se = SELayer(self.oup)
def forward(self, x):
x = self.dw_sp(x)
if self.use_se:
x = self.se(x)
return x
class PP_LCNet(nn.Module):
def __init__(self, scale=1.0, class_num=10, class_expand=1280, dropout_prob=0.2):
super(PP_LCNet, self).__init__()
self.scale = scale
self.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale),
kernel_size=3, stride=2, padding=1, bias=False)
# k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=False
self.blocks2 = nn.Sequential(*[
DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
oup=make_divisible(out_c * self.scale),
dw_size=k, stride=s, use_se=use_se)
for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"])
])
self.blocks3 = nn.Sequential(*[
DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
oup=make_divisible(out_c * self.scale),
dw_size=k, stride=s, use_se=use_se)
for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"])
])
self.blocks4 = nn.Sequential(*[
DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
oup=make_divisible(out_c * self.scale),
dw_size=k, stride=s, use_se=use_se)
for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"])
])
# k, in_c, out_c, s, use_se inp, oup, dw_size, stride, use_se=False
self.blocks5 = nn.Sequential(*[
DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
oup=make_divisible(out_c * self.scale),
dw_size=k, stride=s, use_se=use_se)
for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"])
])
self.blocks6 = nn.Sequential(*[
DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
oup=make_divisible(out_c * self.scale),
dw_size=k, stride=s, use_se=use_se)
for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"])
])
self.GAP = nn.AdaptiveAvgPool2d(1)
self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
out_channels=class_expand,
kernel_size=1, stride=1, padding=0, bias=False)
self.hardswish = HardSwish()
self.dropout = nn.Dropout(p=dropout_prob)
self.fc = nn.Linear(class_expand, class_num)
def forward(self, x):
x = self.conv1(x)
print(x.shape)
x = self.blocks2(x)
print(x.shape)
x = self.blocks3(x)
print(x.shape)
x = self.blocks4(x)
print(x.shape)
x = self.blocks5(x)
print(x.shape)
x = self.blocks6(x)
print(x.shape)
x = self.GAP(x)
x = self.last_conv(x)
x = self.hardswish(x)
x = self.dropout(x)
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = self.fc(x)
return x
def PPLCNET_x0_25(**kwargs):
model = PP_LCNet(scale=0.25, **kwargs)
return model
def PPLCNET_x0_35(**kwargs):
model = PP_LCNet(scale=0.35, **kwargs)
return model
def PPLCNET_x0_5(**kwargs):
model = PP_LCNet(scale=0.5, **kwargs)
return model
def PPLCNET_x0_75(**kwargs):
model = PP_LCNet(scale=0.75, **kwargs)
return model
def PPLCNET_x1_0(**kwargs):
model = PP_LCNet(scale=1.0, **kwargs)
return model
def PPLCNET_x1_5(**kwargs):
model = PP_LCNet(scale=1.5, **kwargs)
return model
def PPLCNET_x2_0(**kwargs):
model = PP_LCNet(scale=2.0, **kwargs)
return model
def PPLCNET_x2_5(**kwargs):
model = PP_LCNet(scale=2.5, **kwargs)
return model
if __name__ == '__main__':
# input = torch.randn(1, 3, 640, 640)
# model = PPLCNET_x2_5()
# flops, params = thop.profile(model, inputs=(input,))
# print('flops:', flops / 1000000000)
# print('params:', params / 1000000)
model = PPLCNET_x1_0()
# model_1 = PW_Conv(3, 16)
input = torch.randn(2, 3, 256, 256)
print(input.shape)
output = model(input)
print(output.shape) # [1, num_class]

View File

@ -0,0 +1,18 @@
import torch
import torch.nn as nn
class FocalLoss(nn.Module):
def __init__(self, gamma=2):
super().__init__()
self.gamma = gamma
self.ce = torch.nn.CrossEntropyLoss()
def forward(self, input, target):
#print(f'theta {input.shape, input[0]}, target {target.shape, target}')
logp = self.ce(input, target)
p = torch.exp(-logp)
loss = (1 - p) ** self.gamma * logp
return loss.mean()

View File

@ -0,0 +1,83 @@
# Definition of ArcFace loss and CosFace loss
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class ArcFace(nn.Module):
def __init__(self, embedding_size, class_num, s=30.0, m=0.50):
"""ArcFace formula:
cos(m + theta) = cos(m)cos(theta) - sin(m)sin(theta)
Note that:
0 <= m + theta <= Pi
So if (m + theta) >= Pi, then theta >= Pi - m. In [0, Pi]
we have:
cos(theta) < cos(Pi - m)
So we can use cos(Pi - m) as threshold to check whether
(m + theta) go out of [0, Pi]
Args:
embedding_size: usually 128, 256, 512 ...
class_num: num of people when training
s: scale, see normface https://arxiv.org/abs/1704.06369
m: margin, see SphereFace, CosFace, and ArcFace paper
"""
super().__init__()
self.in_features = embedding_size
self.out_features = class_num
self.s = s
self.m = m
self.weight = nn.Parameter(torch.FloatTensor(class_num, embedding_size))
nn.init.xavier_uniform_(self.weight)
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m
def forward(self, input, label):
#print(f"embding {self.in_features}, class_num {self.out_features}, input {len(input)}, label {len(label)}")
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
# print('F.normalize(input)',input.shape)
# print('F.normalize(self.weight)',F.normalize(self.weight).shape)
sine = ((1.0 - cosine.pow(2)).clamp(0, 1)).sqrt()
phi = cosine * self.cos_m - sine * self.sin_m
phi = torch.where(cosine > self.th, phi, cosine - self.mm) # drop to CosFace
#print(f'consine {cosine.shape, cosine}, sine {sine.shape, sine}, phi {phi.shape, phi}')
# update y_i by phi in cosine
output = cosine * 1.0 # make backward works
batch_size = len(output)
output[range(batch_size), label] = phi[range(batch_size), label]
# print(f'output {(output * self.s).shape}')
# print(f'phi[range(batch_size), label] {phi[range(batch_size), label]}')
return output * self.s
class CosFace(nn.Module):
def __init__(self, in_features, out_features, s=30.0, m=0.40):
"""
Args:
embedding_size: usually 128, 256, 512 ...
class_num: num of people when training
s: scale, see normface https://arxiv.org/abs/1704.06369
m: margin, see SphereFace, CosFace, and ArcFace paper
"""
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
def forward(self, input, label):
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
phi = cosine - self.m
output = cosine * 1.0 # make backward works
batch_size = len(output)
output[range(batch_size), label] = phi[range(batch_size), label]
return output * self.s

View File

@ -0,0 +1,148 @@
# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from typing import Callable, Any, Optional
import torch
from torch import Tensor
from torch import nn
from torchvision.ops.misc import Conv2dNormActivation
from config import config as conf
__all__ = [
"MobileNetV1",
"DepthWiseSeparableConv2d",
"mobilenet_v1",
]
class MobileNetV1(nn.Module):
def __init__(
self,
num_classes: int = conf.embedding_size,
) -> None:
super(MobileNetV1, self).__init__()
self.features = nn.Sequential(
Conv2dNormActivation(3,
32,
kernel_size=3,
stride=2,
padding=1,
norm_layer=nn.BatchNorm2d,
activation_layer=nn.ReLU,
inplace=True,
bias=False,
),
DepthWiseSeparableConv2d(32, 64, 1),
DepthWiseSeparableConv2d(64, 128, 2),
DepthWiseSeparableConv2d(128, 128, 1),
DepthWiseSeparableConv2d(128, 256, 2),
DepthWiseSeparableConv2d(256, 256, 1),
DepthWiseSeparableConv2d(256, 512, 2),
DepthWiseSeparableConv2d(512, 512, 1),
DepthWiseSeparableConv2d(512, 512, 1),
DepthWiseSeparableConv2d(512, 512, 1),
DepthWiseSeparableConv2d(512, 512, 1),
DepthWiseSeparableConv2d(512, 512, 1),
DepthWiseSeparableConv2d(512, 1024, 2),
DepthWiseSeparableConv2d(1024, 1024, 1),
)
self.avgpool = nn.AvgPool2d((7, 7))
self.classifier = nn.Linear(1024, num_classes)
# Initialize neural network weights
self._initialize_weights()
def forward(self, x: Tensor) -> Tensor:
out = self._forward_impl(x)
return out
# Support torch.script function
def _forward_impl(self, x: Tensor) -> Tensor:
out = self.features(x)
out = self.avgpool(out)
out = torch.flatten(out, 1)
out = self.classifier(out)
return out
def _initialize_weights(self) -> None:
for module in self.modules():
if isinstance(module, nn.Conv2d):
nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
if module.bias is not None:
nn.init.zeros_(module.bias)
elif isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.ones_(module.weight)
nn.init.zeros_(module.bias)
elif isinstance(module, nn.Linear):
nn.init.normal_(module.weight, 0, 0.01)
nn.init.zeros_(module.bias)
class DepthWiseSeparableConv2d(nn.Module):
def __init__(
self,
in_channels: int,
out_channels: int,
stride: int,
norm_layer: Optional[Callable[..., nn.Module]] = None
) -> None:
super(DepthWiseSeparableConv2d, self).__init__()
self.stride = stride
if stride not in [1, 2]:
raise ValueError(f"stride should be 1 or 2 instead of {stride}")
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self.conv = nn.Sequential(
Conv2dNormActivation(in_channels,
in_channels,
kernel_size=3,
stride=stride,
padding=1,
groups=in_channels,
norm_layer=norm_layer,
activation_layer=nn.ReLU,
inplace=True,
bias=False,
),
Conv2dNormActivation(in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
norm_layer=norm_layer,
activation_layer=nn.ReLU,
inplace=True,
bias=False,
),
)
def forward(self, x: Tensor) -> Tensor:
out = self.conv(x)
return out
def mobilenet_v1(**kwargs: Any) -> MobileNetV1:
model = MobileNetV1(**kwargs)
return model

View File

@ -0,0 +1,200 @@
from torch import nn
from .utils import load_state_dict_from_url
from ..config import config as conf
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
padding = (kernel_size - 1) // 2
if norm_layer is None:
norm_layer = nn.BatchNorm2d
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
norm_layer(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
if norm_layer is None:
norm_layer = nn.BatchNorm2d
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
norm_layer(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,
num_classes=conf.embedding_size,
width_mult=1.0,
inverted_residual_setting=None,
round_nearest=8,
block=None,
norm_layer=None):
"""
MobileNet V2 main class
Args:
num_classes (int): Number of classes
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
inverted_residual_setting: Network structure
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
Set to 1 to turn off rounding
block: Module specifying inverted residual building block for mobilenet
norm_layer: Module specifying the normalization layer to use
"""
super(MobileNetV2, self).__init__()
if block is None:
block = InvertedResidual
if norm_layer is None:
norm_layer = nn.BatchNorm2d
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
# make it nn.Sequential
self.features = nn.Sequential(*features)
# building classifier
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def _forward_impl(self, x):
# This exists since TorchScript doesn't support inheritance, so the superclass method
# (this one) needs to have a name other than `forward` that can be accessed in a subclass
x = self.features(x)
# Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
x = self.classifier(x)
return x
def forward(self, x):
return self._forward_impl(x)
def mobilenet_v2(pretrained=True, progress=True, **kwargs):
"""
Constructs a MobileNetV2 architecture from
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
model = MobileNetV2(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
#.load_state_dict(state_dict)
return model

View File

@ -0,0 +1,200 @@
'''MobileNetV3 in PyTorch.
See the paper "Inverted Residuals and Linear Bottlenecks:
Mobile Networks for Classification, Detection and Segmentation" for more details.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
from ..config import config as conf
class hswish(nn.Module):
def forward(self, x):
out = x * F.relu6(x + 3, inplace=True) / 6
return out
class hsigmoid(nn.Module):
def forward(self, x):
out = F.relu6(x + 3, inplace=True) / 6
return out
class SeModule(nn.Module):
def __init__(self, in_size, reduction=4):
super(SeModule, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size // reduction),
nn.ReLU(inplace=True),
nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size),
hsigmoid()
)
def forward(self, x):
return x * self.se(x)
class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
super(Block, self).__init__()
self.stride = stride
self.se = semodule
self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(expand_size)
self.nolinear1 = nolinear
self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
self.bn2 = nn.BatchNorm2d(expand_size)
self.nolinear2 = nolinear
self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_size)
self.shortcut = nn.Sequential()
if stride == 1 and in_size != out_size:
self.shortcut = nn.Sequential(
nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_size),
)
def forward(self, x):
out = self.nolinear1(self.bn1(self.conv1(x)))
out = self.nolinear2(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
if self.se != None:
out = self.se(out)
out = out + self.shortcut(x) if self.stride==1 else out
return out
class MobileNetV3_Large(nn.Module):
def __init__(self, num_classes=conf.embedding_size):
super(MobileNetV3_Large, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.hs1 = hswish()
self.bneck = nn.Sequential(
Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
Block(3, 40, 240, 80, hswish(), None, 2),
Block(3, 80, 200, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
)
self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(960)
self.hs2 = hswish()
self.linear3 = nn.Linear(960, 1280)
self.bn3 = nn.BatchNorm1d(1280)
self.hs3 = hswish()
self.linear4 = nn.Linear(1280, num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
out = self.hs1(self.bn1(self.conv1(x)))
out = self.bneck(out)
out = self.hs2(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, conf.img_size // 32)
out = out.view(out.size(0), -1)
out = self.hs3(self.bn3(self.linear3(out)))
out = self.linear4(out)
return out
class MobileNetV3_Small(nn.Module):
def __init__(self, num_classes=conf.embedding_size):
super(MobileNetV3_Small, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.hs1 = hswish()
self.bneck = nn.Sequential(
Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
)
self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(576)
self.hs2 = hswish()
self.linear3 = nn.Linear(576, 1280)
self.bn3 = nn.BatchNorm1d(1280)
self.hs3 = hswish()
self.linear4 = nn.Linear(1280, num_classes)
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
out = self.hs1(self.bn1(self.conv1(x)))
out = self.bneck(out)
out = self.hs2(self.bn2(self.conv2(out)))
out = F.avg_pool2d(out, conf.img_size // 32)
out = out.view(out.size(0), -1)
out = self.hs3(self.bn3(self.linear3(out)))
out = self.linear4(out)
return out
def test():
net = MobileNetV3_Small()
x = torch.randn(2,3,224,224)
y = net(x)
print(y.size())
# test()

View File

@ -0,0 +1,268 @@
import torch
import torch.nn as nn
from einops import rearrange
# import sys
# sys.path.append(r"D:\DetectTracking")
from ..config import config as conf
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.SiLU()
)
def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
return nn.Sequential(
nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.SiLU()
)
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.norm(x), **kwargs)
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, hidden_dim),
nn.SiLU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class Attention(nn.Module):
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
super().__init__()
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
self.heads = heads
self.scale = dim_head ** -0.5
self.attend = nn.Softmax(dim=-1)
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
self.to_out = nn.Sequential(
nn.Linear(inner_dim, dim),
nn.Dropout(dropout)
) if project_out else nn.Identity()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
out = torch.matmul(attn, v)
out = rearrange(out, 'b p h n d -> b p n (h d)')
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
super().__init__()
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return x
class MV2Block(nn.Module):
def __init__(self, inp, oup, stride=1, expansion=4):
super().__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(inp * expansion)
self.use_res_connect = self.stride == 1 and inp == oup
if expansion == 1:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# dw
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.SiLU(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileViTBlock(nn.Module):
def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
super().__init__()
self.ph, self.pw = patch_size
self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
self.conv2 = conv_1x1_bn(channel, dim)
self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)
self.conv3 = conv_1x1_bn(dim, channel)
self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
def forward(self, x):
y = x.clone()
# Local representations
x = self.conv1(x)
x = self.conv2(x)
# Global representations
_, _, h, w = x.shape
x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
x = self.transformer(x)
x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h // self.ph, w=w // self.pw, ph=self.ph,
pw=self.pw)
# Fusion
x = self.conv3(x)
x = torch.cat((x, y), 1)
x = self.conv4(x)
return x
class MobileViT(nn.Module):
def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
super().__init__()
ih, iw = image_size
ph, pw = patch_size
assert ih % ph == 0 and iw % pw == 0
L = [2, 4, 3]
self.conv1 = conv_nxn_bn(3, channels[0], stride=2)
self.mv2 = nn.ModuleList([])
self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion))
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion)) # Repeat
self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion))
self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
self.mvit = nn.ModuleList([])
self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0] * 2)))
self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1] * 4)))
self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2] * 4)))
self.conv2 = conv_1x1_bn(channels[-2], channels[-1])
self.pool = nn.AvgPool2d(ih // 32, 1)
self.fc = nn.Linear(channels[-1], num_classes, bias=False)
def forward(self, x):
#print('x',x.shape)
x = self.conv1(x)
x = self.mv2[0](x)
x = self.mv2[1](x)
x = self.mv2[2](x)
x = self.mv2[3](x) # Repeat
x = self.mv2[4](x)
x = self.mvit[0](x)
x = self.mv2[5](x)
x = self.mvit[1](x)
x = self.mv2[6](x)
x = self.mvit[2](x)
x = self.conv2(x)
#print('pool_before',x.shape)
x = self.pool(x).view(-1, x.shape[1])
#print('self_pool',self.pool)
#print('pool_after',x.shape)
x = self.fc(x)
return x
def mobilevit_xxs():
dims = [64, 80, 96]
channels = [16, 16, 24, 24, 48, 48, 64, 64, 80, 80, 320]
return MobileViT((256, 256), dims, channels, num_classes=1000, expansion=2)
def mobilevit_xs():
dims = [96, 120, 144]
channels = [16, 32, 48, 48, 64, 64, 80, 80, 96, 96, 384]
return MobileViT((256, 256), dims, channels, num_classes=1000)
def mobilevit_s():
dims = [144, 192, 240]
channels = [16, 32, 64, 64, 96, 96, 128, 128, 160, 160, 640]
return MobileViT((conf.img_size, conf.img_size), dims, channels, num_classes=conf.embedding_size)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
if __name__ == '__main__':
img = torch.randn(5, 3, 256, 256)
vit = mobilevit_xxs()
out = vit(img)
print(out.shape)
print(count_parameters(vit))
vit = mobilevit_xs()
out = vit(img)
print(out.shape)
print(count_parameters(vit))
vit = mobilevit_s()
out = vit(img)
print(out.shape)
print(count_parameters(vit))

View File

@ -0,0 +1,145 @@
import torch
import torch.nn as nn
from .CBAM import CBAM
from .Tool import GeM as gem
# from model.CBAM import CBAM
# from model.Tool import GeM as gem
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inchannel, outchannel, stride=1, dowsample=None):
# super(Bottleneck, self).__init__()
super().__init__()
self.conv1 = nn.Conv2d(in_channels=inchannel, out_channels=outchannel, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(outchannel)
self.conv2 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel, kernel_size=3, bias=False,
stride=stride, padding=1)
self.bn2 = nn.BatchNorm2d(outchannel)
self.conv3 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel * self.expansion, stride=1, bias=False,
kernel_size=1)
self.bn3 = nn.BatchNorm2d(outchannel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = dowsample
def forward(self, x):
self.identity = x
# print('>>>>>>>>',type(x))
if self.downsample is not None:
# print('>>>>downsample>>>>', type(self.downsample))
self.identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
# print('>>>>out>>>identity',out.size(),self.identity.size())
out = out + self.identity
out = self.relu(out)
return out
class resnet(nn.Module):
def __init__(self, block=Bottleneck, block_num=[3, 4, 6, 3], num_class=1000):
super().__init__()
self.in_channel = 64
self.conv1 = nn.Conv2d(in_channels=3,
out_channels=self.in_channel,
stride=2,
kernel_size=7,
padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.cbam = CBAM(self.in_channel)
self.cbam1 = CBAM(2048)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, block_num[0], stride=1)
self.layer2 = self._make_layer(block, 128, block_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, block_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, block_num[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.gem = gem()
self.fc = nn.Linear(512 * block.expansion, num_class)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal(m.weight, mode='fan_out',
nonlinearity='relu')
if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 1.0)
def _make_layer(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion))
layer = []
layer.append(block(self.in_channel, channel, stride, downsample))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layer.append(block(self.in_channel, channel))
return nn.Sequential(*layer)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.cbam(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.cbam1(x)
# x = self.avgpool(x)
x = self.gem(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
class TripletNet(nn.Module):
def __init__(self, num_class, flag=True):
super(TripletNet, self).__init__()
self.initnet = rescbam(num_class)
self.flag = flag
def forward(self, x1, x2=None, x3=None):
if self.flag:
output1 = self.initnet(x1)
output2 = self.initnet(x2)
output3 = self.initnet(x3)
return output1, output2, output3
else:
output = self.initnet(x1)
return output
def rescbam(num_class):
return resnet(block=Bottleneck, block_num=[3, 4, 6, 3], num_class=num_class)
if __name__ == '__main__':
input1 = torch.randn(4, 3, 640, 640)
input2 = torch.randn(4, 3, 640, 640)
input3 = torch.randn(4, 3, 640, 640)
# rescbam测试
# Resnet50 = rescbam(512)
# output = Resnet50.forward(input1)
# print(Resnet50)
# trnet测试
trnet = TripletNet(512)
output = trnet(input1, input2, input3)
print(output)

View File

@ -0,0 +1,189 @@
"""resnet in pytorch
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
Deep Residual Learning for Image Recognition
https://arxiv.org/abs/1512.03385v1
"""
import torch
import torch.nn as nn
from config import config as conf
from CBAM import CBAM
class BasicBlock(nn.Module):
"""Basic Block for resnet 18 and resnet 34
"""
#BasicBlock and BottleNeck block
#have different output size
#we use class attribute expansion
#to distinct
expansion = 1
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
#residual function
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
#shortcut
self.shortcut = nn.Sequential()
#the shortcut output dimension is not the same with residual function
#use 1*1 convolution to match the dimension
if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * BasicBlock.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class BottleNeck(nn.Module):
"""Residual block for resnet over 50 layers
"""
expansion = 4
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.residual_function = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion),
)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channels * BottleNeck.expansion)
)
def forward(self, x):
return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
class ResNet(nn.Module):
def __init__(self, block, num_block, cbam = False, num_classes=conf.embedding_size):
super().__init__()
self.in_channels = 64
# self.conv1 = nn.Sequential(
# nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
# nn.BatchNorm2d(64),
# nn.ReLU(inplace=True))
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64,stride=2,kernel_size=7,padding=3,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.cbam = CBAM(self.in_channels)
#we use a different inputsize than the original paper
#so conv2_x's stride is 1
self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
self.cbam1 = CBAM(self.in_channels)
self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal(m.weight,mode = 'fan_out',
nonlinearity='relu')
if isinstance(m, (nn.BatchNorm2d)):
nn.init.constant_(m.weight, 1.0)
nn.init.constant_(m.bias, 1.0)
def _make_layer(self, block, out_channels, num_blocks, stride):
"""make resnet layers(by layer i didnt mean this 'layer' was the
same as a neuron netowork layer, ex. conv layer), one layer may
contain more than one residual block
Args:
block: block type, basic block or bottle neck block
out_channels: output depth channel number of this layer
num_blocks: how many blocks per layer
stride: the stride of the first block of this layer
Return:
return a resnet layer
"""
# we have num_block blocks per layer, the first block
# could be 1 or 2, other blocks would always be 1
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
output = self.conv1(x)
if cbam:
output = self.cbam(x)
output = self.conv2_x(output)
output = self.conv3_x(output)
output = self.conv4_x(output)
output = self.conv5_x(output)
if cbam:
output = self.cbam1(x)
print('pollBefore',output.shape)
output = self.avg_pool(output)
print('poolAfter',output.shape)
output = output.view(output.size(0), -1)
print('fcBefore',output.shape)
output = self.fc(output)
return output
def resnet18(cbam = False):
""" return a ResNet 18 object
"""
return ResNet(BasicBlock, [2, 2, 2, 2], cbam)
def resnet34():
""" return a ResNet 34 object
"""
return ResNet(BasicBlock, [3, 4, 6, 3])
def resnet50():
""" return a ResNet 50 object
"""
return ResNet(BottleNeck, [3, 4, 6, 3])
def resnet101():
""" return a ResNet 101 object
"""
return ResNet(BottleNeck, [3, 4, 23, 3])
def resnet152():
""" return a ResNet 152 object
"""
return ResNet(BottleNeck, [3, 8, 36, 3])

View File

@ -0,0 +1,121 @@
""" Resnet_IR_SE in ArcFace """
import torch
import torch.nn as nn
import torch.nn.functional as F
class Flatten(nn.Module):
def forward(self, x):
return x.reshape(x.shape[0], -1)
class SEConv(nn.Module):
"""Use Convolution instead of FullyConnection in SE"""
def __init__(self, channels, reduction):
super().__init__()
self.net = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(channels, channels // reduction, kernel_size=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(channels // reduction, channels, kernel_size=1, bias=False),
nn.Sigmoid(),
)
def forward(self, x):
return self.net(x) * x
class SE(nn.Module):
def __init__(self, channels, reduction):
super().__init__()
self.net = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Linear(channels, channels // reduction),
nn.ReLU(inplace=True),
nn.Linear(channels // reduction, channels),
nn.Sigmoid(),
)
def forward(self, x):
return self.net(x) * x
class IRSE(nn.Module):
def __init__(self, channels, depth, stride):
super().__init__()
if channels == depth:
self.shortcut = nn.MaxPool2d(kernel_size=1, stride=stride)
else:
self.shortcut = nn.Sequential(
nn.Conv2d(channels, depth, (1, 1), stride, bias=False),
nn.BatchNorm2d(depth),
)
self.residual = nn.Sequential(
nn.BatchNorm2d(channels),
nn.Conv2d(channels, depth, (3, 3), 1, 1, bias=False),
nn.PReLU(depth),
nn.Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
nn.BatchNorm2d(depth),
SEConv(depth, 16),
)
def forward(self, x):
return self.shortcut(x) + self.residual(x)
class ResIRSE(nn.Module):
"""Resnet50-IRSE backbone"""
def __init__(self, ih, embedding_size, drop_ratio):
super().__init__()
ih_last = ih // 16
self.input_layer = nn.Sequential(
nn.Conv2d(3, 64, (3, 3), 1, 1, bias=False),
nn.BatchNorm2d(64),
nn.PReLU(64),
)
self.output_layer = nn.Sequential(
nn.BatchNorm2d(512),
nn.Dropout(drop_ratio),
Flatten(),
nn.Linear(512 * ih_last * ih_last, embedding_size),
nn.BatchNorm1d(embedding_size),
)
# ["channels", "depth", "stride"],
self.res50_arch = [
[64, 64, 2], [64, 64, 1], [64, 64, 1],
[64, 128, 2], [128, 128, 1], [128, 128, 1], [128, 128, 1],
[128, 256, 2], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 256, 1], [256, 256, 1], [256, 256, 1], [256, 256, 1],
[256, 512, 2], [512, 512, 1], [512, 512, 1],
]
self.body = nn.Sequential(*[IRSE(a, b, c) for (a, b, c) in self.res50_arch])
def forward(self, x):
x = self.input_layer(x)
x = self.body(x)
x = self.output_layer(x)
return x
if __name__ == "__main__":
from PIL import Image
import numpy as np
x = Image.open("../samples/009.jpg").convert('L')
x = x.resize((128, 128))
x = np.asarray(x, dtype=np.float32)
x = x[None, None, ...]
x = torch.from_numpy(x)
net = ResIRSE(512, 0.6)
net.eval()
with torch.no_grad():
out = net(x)
print(out.shape)

View File

@ -0,0 +1,462 @@
import torch
import torch.nn as nn
from ..config import config as conf
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
# from .utils import load_state_dict_from_url
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.cam = cam
self.bam = bam
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16))
self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes)
self.sigmod = nn.Sigmoid()
if self.bam:
self.bam = SpatialAttention()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
if self.bam:
out = out*self.bam(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
self.cam = cam
self.bam = bam
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4))
self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion)
self.sigmod = nn.Sigmoid()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None, scale=0.75):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, int(64*scale), layers[0])
self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# print('poolBefore', x.shape)
x = self.avgpool(x)
# print('poolAfter', x.shape)
x = torch.flatten(x, 1)
# print('fcBefore',x.shape)
x = self.fc(x)
# print('fcAfter',x.shape)
return x
def forward(self, x):
return self._forward_impl(x)
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
# model = ResNet(block, layers, **kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls[arch],
# progress=progress)
# model.load_state_dict(state_dict, strict=False)
# return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
return model
def resnet14(pretrained=True, progress=True, **kwargs):
r"""ResNet-14 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress,
**kwargs)
def resnet18(pretrained=True, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=False, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=False, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=False, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=False, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

View File

@ -0,0 +1,4 @@
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url

View File

@ -0,0 +1,137 @@
import torch
from vit_pytorch.mobile_vit import MobileViT
from vit_pytorch import vit
from vit_pytorch import SimpleViT
import torch
from torch import nn
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
# helpers
def pair(t):
return t if isinstance(t, tuple) else (t, t)
# classes
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.):
super().__init__()
self.net = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class Attention(nn.Module):
def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
super().__init__()
inner_dim = dim_head * heads
project_out = not (heads == 1 and dim_head == dim)
self.heads = heads
self.scale = dim_head ** -0.5
self.norm = nn.LayerNorm(dim)
self.attend = nn.Softmax(dim=-1)
self.dropout = nn.Dropout(dropout)
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
self.to_out = nn.Sequential(
nn.Linear(inner_dim, dim),
nn.Dropout(dropout)
) if project_out else nn.Identity()
def forward(self, x):
x = self.norm(x)
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
attn = self.dropout(attn)
out = torch.matmul(attn, v)
out = rearrange(out, 'b h n d -> b n (h d)')
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
super().__init__()
self.norm = nn.LayerNorm(dim)
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(nn.ModuleList([
Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout),
FeedForward(dim, mlp_dim, dropout=dropout)
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
class ViT(nn.Module):
def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool='cls', channels=3,
dim_head=64, dropout=0., emb_dropout=0.):
super().__init__()
image_height, image_width = pair(image_size)
patch_height, patch_width = pair(patch_size)
assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_height // patch_height) * (image_width // patch_width)
patch_dim = channels * patch_height * patch_width
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
self.to_patch_embedding = nn.Sequential(
Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_height, p2=patch_width),
nn.LayerNorm(patch_dim),
nn.Linear(patch_dim, dim),
nn.LayerNorm(dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
self.dropout = nn.Dropout(emb_dropout)
self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
self.pool = pool
self.to_latent = nn.Identity()
self.mlp_head = nn.Linear(dim, num_classes)
def forward(self, img):
x = self.to_patch_embedding(img)
b, n, _ = x.shape
cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b=b)
x = torch.cat((cls_tokens, x), dim=1)
x += self.pos_embedding[:, :(n + 1)]
x = self.dropout(x)
x = self.transformer(x)
x = x.mean(dim=1) if self.pool == 'mean' else x[:, 0]
x = self.to_latent(x)
return self.mlp_head(x)

8
contrast/feat_extract/resnet_vit/.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (my_env)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="lc@192.168.1.142:22 password">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,12 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N803" />
</list>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.8 (my_env)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (my_env)" project-jdk-type="Python SDK" />
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/contrastInference.iml" filepath="$PROJECT_DIR$/.idea/contrastInference.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1 @@
# from .config import config

View File

@ -0,0 +1,84 @@
import torch
import torchvision.transforms as T
class Config:
# network settings
backbone = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
metric = 'softmax' # [cosface, arcface, softmax]
cbam = True
embedding_size = 256 # 256
drop_ratio = 0.5
img_size = 224
teacher = 'vit' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
student = 'resnet'
# data preprocess
# input_shape = [1, 128, 128]
"""transforms.RandomCrop(size),
transforms.RandomVerticalFlip(p=0.5),
transforms.RandomHorizontalFlip(),
RandomRotate(15, 0.3),
# RandomGaussianBlur()"""
train_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
# T.RandomCrop(img_size*4//5),
# T.RandomHorizontalFlip(p=0.5),
T.RandomRotation(180),
T.ColorJitter(brightness=0.5),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
test_transform = T.Compose([
T.ToTensor(),
T.Resize((img_size, img_size)),
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=[0.5], std=[0.5]),
])
# dataset
train_root = './data/2250_train/train' # 初始筛选过一次的数据集
# train_root = './data/0625_train/train'
test_root = "./data/2250_train/val/"
# test_root = "./data/0625_train/val"
test_list = "./data/2250_train/val_pair.txt"
test_group_json = "./data/2250_train/cross_same.json"
# test_group_json = "./data/0625_train/cross_same.json"
# test_list = "./data/test_data_100/val_pair.txt"
# training settings
checkpoints = "checkpoints/vit_b_16_0815/" # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
restore = True
# restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth" # best_resnet18_1491_0306.pth
restore_model = "checkpoints/vit_b_16_0730/best.pth" # best_resnet18_1491_0306.pth
# test_model = "./checkpoints/renet18_1887_0311/best_resnet18_1887_0311.pth"
testbackbone = 'resnet18' # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5]
# test_val = "./data/2250_train"
test_val = "./data/0625_train"
test_model = "checkpoints/resnet18_0721/best.pth"
train_batch_size = 128 # 256
test_batch_size = 256 # 256
epoch = 300
optimizer = 'adamw' # ['sgd', 'adam' 'adamw']
lr = 1e-3 # 1e-2
lr_step = 10 # 10
lr_decay = 0.95 # 0.98
weight_decay = 5e-4
loss = 'focal_loss' # ['focal_loss', 'cross_entropy']
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
pin_memory = True # if memory is large, set it True to speed up a bit
num_workers = 4 # dataloader
group_test = True
# group_test = False
config = Config()

View File

@ -0,0 +1,103 @@
import os
import os.path as osp
import torch
import numpy as np
from model import resnet18
from PIL import Image
from torch.nn.functional import softmax
from config import config as conf
import time
embedding_size = conf.embedding_size
img_size = conf.img_size
device = conf.device
def load_contrast_model():
model = resnet18().to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
print('load model {} '.format(conf.testbackbone))
return model
def group_image(imageDirs, batch) -> list:
images = []
"""Group image paths by batch size"""
with os.scandir(imageDirs) as entries:
for imgpth in entries:
print(imgpth)
images.append(os.sep.join([imageDirs, imgpth.name]))
print(f"{len(images)} images in {imageDirs}")
size = len(images)
res = []
for i in range(0, size, batch):
end = min(batch + i, size)
res.append(images[i: end])
return res
def test_preprocess(images: list, transform) -> torch.Tensor:
res = []
for img in images:
# print(img)
im = Image.open(img)
im = transform(im)
res.append(im)
# data = torch.cat(res, dim=0) # shape: (batch, 128, 128)
# data = data[:, None, :, :] # shape: (batch, 1, 128, 128)
data = torch.stack(res)
return data
def featurize(images: list, transform, net, device) -> dict:
"""featurize each image and save into a dictionary
Args:
images: image paths
transform: test transform
net: pretrained model
device: cpu or cuda
Returns:
Dict (key: imagePath, value: feature)
"""
data = test_preprocess(images, transform)
data = data.to(device)
net = net.to(device)
with torch.no_grad():
features = net(data)
# res = {img: feature for (img, feature) in zip(images, features)}
return features
if __name__ == '__main__':
# Network Setup
if conf.testbackbone == 'resnet18':
model = resnet18().to(device)
else:
raise ValueError('Have not model {}'.format(conf.backbone))
print('load model {} '.format(conf.testbackbone))
# model = nn.DataParallel(model).to(conf.device)
model.load_state_dict(torch.load(conf.test_model, map_location=conf.device))
model.eval()
# images = unique_image(conf.test_list)
# images = [osp.join(conf.test_val, img) for img in images]
# print('images', images)
# images = ['./data/2250_train/val/6920616313186/6920616313186_6920616313186_20240220-124502_53d2e103-ae3a-4689-b745-9d8723b770fe_front_returnGood_70f75407b7ae_31_01.jpg']
# groups = group_image(conf.test_val, conf.test_batch_size) ##根据batch_size取图片
groups = group_image('img_test', 1) ##根据batch_size取图片, 默认batch_size = 8
feature_dict = dict()
for group in groups:
s = time.time()
features = featurize(group, conf.test_transform, model, conf.device)
e = time.time()
print('time: {}'.format(e - s))
# out = softmax(features, dim=1).argmax(dim=1)
# print('d >>> {}'. format(out))
# feature_dict.update(d)

View File

@ -0,0 +1 @@
from .resnet_pre import resnet18, resnet34, resnet50, resnet14

View File

@ -0,0 +1,462 @@
import torch
import torch.nn as nn
from config import config as conf
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
# from .utils import load_state_dict_from_url
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x)
return self.sigmoid(x)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.cam = cam
self.bam = bam
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16))
self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes)
self.sigmod = nn.Sigmoid()
if self.bam:
self.bam = SpatialAttention()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
if self.bam:
out = out*self.bam(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
self.cam = cam
self.bam = bam
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
if self.cam:
if planes == 64:
self.globalAvgPool = nn.AvgPool2d(56, stride=1)
elif planes == 128:
self.globalAvgPool = nn.AvgPool2d(28, stride=1)
elif planes == 256:
self.globalAvgPool = nn.AvgPool2d(14, stride=1)
elif planes == 512:
self.globalAvgPool = nn.AvgPool2d(7, stride=1)
self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4))
self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion)
self.sigmod = nn.Sigmoid()
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
if self.cam:
ori_out = self.globalAvgPool(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.fc2(out)
out = self.sigmod(out)
out = out.view(out.size(0), out.size(-1), 1, 1)
out = out * ori_out
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None, scale=0.75):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, int(64*scale), layers[0])
self.layer2 = self._make_layer(block, int(128*scale), layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, int(256*scale), layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, int(512*scale), layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(int(512 * block.expansion*scale), num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# print('poolBefore', x.shape)
x = self.avgpool(x)
# print('poolAfter', x.shape)
x = torch.flatten(x, 1)
# print('fcBefore',x.shape)
x = self.fc(x)
# print('fcAfter',x.shape)
return x
def forward(self, x):
return self._forward_impl(x)
# def _resnet(arch, block, layers, pretrained, progress, **kwargs):
# model = ResNet(block, layers, **kwargs)
# if pretrained:
# state_dict = load_state_dict_from_url(model_urls[arch],
# progress=progress)
# model.load_state_dict(state_dict, strict=False)
# return model
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
src_state_dict = state_dict
target_state_dict = model.state_dict()
skip_keys = []
# skip mismatch size tensors in case of pretraining
for k in src_state_dict.keys():
if k not in target_state_dict:
continue
if src_state_dict[k].size() != target_state_dict[k].size():
skip_keys.append(k)
for k in skip_keys:
del src_state_dict[k]
missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
return model
def resnet14(pretrained=True, progress=True, **kwargs):
r"""ResNet-14 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress,
**kwargs)
def resnet18(pretrained=True, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=False, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=False, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=False, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=False, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

215
contrast/genfeats.py Normal file
View File

@ -0,0 +1,215 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 3 12:05:19 2024
@author: ym
"""
import os
import time
# import torch
import pickle
# import json
import numpy as np
from PIL import Image
from feat_extract.config import config as conf
# from model import resnet18 as resnet18
from feat_extract.inference import FeatsInterface #, inference_image
IMG_FORMAT = ['.bmp', '.jpg', '.jpeg', '.png']
# def model_init(conf, mpath=None):
# '''======= 0. 配置特征提取模型地址 ======='''
# if mpath is None:
# model_path = conf.test_model
# else:
# model_path = mpath
# ##============ load resnet mdoel
# model = resnet18().to(conf.device)
# # model = nn.DataParallel(model).to(conf.device)
# model.load_state_dict(torch.load(model_path, map_location=conf.device))
# model.eval()
# print('load model {} '.format(conf.testbackbone))
# return model
def get_std_barcodeDict(bcdpath, savepath, bcdSet):
'''
inputs:
bcdpath: 已清洗的barcode样本图像如果barcode下有'base'文件夹,只选用该文件夹下图像
(default = r'\\192.168.1.28\share\已标注数据备份\对比数据\barcode\barcode_1771')
功能:
生成并保存只有一个key值的字典 {barcode: [imgpath1, imgpath1, ...]}
savepath: 字典存储地址文件名格式barcode.pickle
'''
# savepath = r'\\192.168.1.28\share\测试_202406\contrast\std_barcodes'
'''读取数据集中 barcode 列表'''
stdBarcodeList = []
for filename in os.listdir(bcdpath):
filepath = os.path.join(bcdpath, filename)
if not os.path.isdir(filepath) or not filename.isdigit() or len(filename)<8:
continue
if bcdSet is None:
stdBarcodeList.append(filename)
elif filename in bcdSet:
stdBarcodeList.append(filename)
bcdPaths = [(barcode, os.path.join(bcdpath, barcode)) for barcode in stdBarcodeList]
'''遍历数据集针对每一个barcode生成并保存字典{barcode: [imgpath1, imgpath1, ...]}'''
k = 0
errbarcodes = []
for barcode, bpath in bcdPaths:
pickpath = os.path.join(savepath, f"{barcode}.pickle")
if os.path.isfile(pickpath):
continue
stdBarcodeDict = {}
stdBarcodeDict[barcode] = []
for root, dirs, files in os.walk(bpath):
imgpaths = []
if "base" in dirs:
broot = os.path.join(root, "base")
for imgname in os.listdir(broot):
imgpath = os.path.join(broot, imgname)
file, ext = os.path.splitext(imgpath)
if ext not in IMG_FORMAT:
continue
imgpaths.append(imgpath)
stdBarcodeDict[barcode].extend(imgpaths)
break
else:
for imgname in files:
imgpath = os.path.join(root, imgname)
_, ext = os.path.splitext(imgpath)
if ext not in IMG_FORMAT: continue
imgpaths.append(imgpath)
stdBarcodeDict[barcode].extend(imgpaths)
pickpath = os.path.join(savepath, f"{barcode}.pickle")
with open(pickpath, 'wb') as f:
pickle.dump(stdBarcodeDict, f)
print(f"Barcode: {barcode}")
# k += 1
# if k == 10:
# break
print(f"Len of errbarcodes: {len(errbarcodes)}")
return
def stdfeat_infer(imgPath, featPath, bcdSet=None):
'''
inputs:
imgPath: 该文件夹下的 pickle 文件格式 {barcode: [imgpath1, imgpath1, ...]}
featPath: imgPath图像对应特征的存储地址
功能:
对 imgPath中图像进行特征提取生成只有一个key值的字典
{barcode: features}features.shape=(nsample, 256),并保存至 featPath 中
'''
# imgPath = r"\\192.168.1.28\share\测试_202406\contrast\std_barcodes"
# featPath = r"\\192.168.1.28\share\测试_202406\contrast\std_features"
stdBarcodeDict = {}
stdBarcodeDict_ft16 = {}
Encoder = FeatsInterface(conf)
'''4处同名: (1)barcode原始图像文件夹; (2)imgPath中的 .pickle 文件名;
(3)该pickle文件中字典的key值; (4)特征向量字典中的一个key值'''
k = 0
for filename in os.listdir(imgPath):
bcd, ext = os.path.splitext(filename)
filepath = os.path.join(imgPath, filename)
if ext != ".pickle": continue
if bcdSet is not None and bcd not in bcdSet:
continue
featpath = os.path.join(featPath, f"{bcd}.pickle")
if os.path.isfile(featpath):
continue
stdbDict = {}
t1 = time.time()
try:
with open(filepath, 'rb') as f:
bpDict = pickle.load(f)
for barcode, imgpaths in bpDict.items():
# feature = batch_inference(imgpaths, 8) #from vit distilled model of LiChen
# feature = inference_image(imgpaths, conf.test_transform, model, conf.device)
imgs = []
for d, imgpath in enumerate(imgpaths):
img = Image.open(imgpath)
imgs.append(img)
feature = Encoder.inference(imgs)
feature /= np.linalg.norm(feature, axis=1)[:, None]
# float16
feature_ft16 = feature.astype(np.float16)
feature_ft16 /= np.linalg.norm(feature_ft16, axis=1)[:, None]
# uint8, 两种策略1) 精度损失小, 2) 计算复杂度小
# feature_uint8, _ = ft16_to_uint8(feature_ft16)
feature_uint8 = (feature_ft16*128).astype(np.int8)
except Exception as e:
print(f"Error accured at: {filename}, with Exception is: {e}")
'''================ 保存单个barcode特征 ================'''
##================== float32
stdbDict["barcode"] = barcode
stdbDict["imgpaths"] = imgpaths
stdbDict["feats_ft32"] = feature
stdbDict["feats_ft16"] = feature_ft16
stdbDict["feats_uint8"] = feature_uint8
with open(featpath, 'wb') as f:
pickle.dump(stdbDict, f)
stdBarcodeDict[barcode] = feature
stdBarcodeDict_ft16[barcode] = feature_ft16
t2 = time.time()
print(f"Barcode: {barcode}, need time: {t2-t1:.1f} secs")
# k += 1
# if k == 10:
# break
return
def gen_bcd_features(imgpath, bcdpath, featpath, bcdSet=None):
''' 生成标准特征集 '''
'''1. 提取 imgpath 中样本地址,生成字典{barcode: [imgpath1, imgpath1, ...]}
并存储于: bcdpath, 格式为 barcode.pickle'''
get_std_barcodeDict(imgpath, bcdpath, bcdSet)
'''2. 特征提取,并保存至文件夹 featpath 中,也根据 bcdSet 交集执行'''
stdfeat_infer(bcdpath, featpath, bcdSet)
def main():
imgpath = r"\\192.168.1.28\share\数据\已完成数据\展厅数据\v1.0\比对数据\整理\zhantingBase"
bcdpath = r"D:\exhibition\dataset\bcdpath"
featpath = r"D:\exhibition\dataset\feats"
gen_bcd_features(imgpath, bcdpath, featpath)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,433 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Jul 27 14:07:25 2024
现场测试数据,在不同特征组合情况下的精度、召回率分析程序
@author: ym
"""
import os.path
import numpy as np
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.read_data import extract_data, read_deletedBarcode_file, read_tracking_output
# from tracking.dotrack.dotracks import Track
from one2n_contrast import compute_recall_precision, show_recall_prec
from one2n_contrast import performance_evaluate, one2n_return, one2n_deleted
def compute_similar(feat1, feat2):
if len(feat1)==0 or len(feat2)==0:
return 0
similar = 1 - np.maximum(0.0, cdist(feat1, feat2, metric = 'cosine'))
smean = np.mean(similar)
return smean
def update_event(datapath):
'''一次购物事件,包含 8 个keys
back_sole_boxes后摄boxes
front_sole_boxes前摄boxes
back_sole_feats后摄特征
front_sole_feats前摄特征
feats_compose将前后摄特征进行合并
feats_select特征选择优先选择前摄特征
'''
event = {}
# event['front_tracking_boxes'] = []
# event['front_tracking_feats'] = {}
# event['back_tracking_boxes'] = []
# event['back_tracking_feats'] = {}
event['back_sole_boxes'] = np.empty((0, 9), dtype=np.float64)
event['front_sole_boxes'] = np.empty((0, 9), dtype=np.float64)
event['back_sole_feats'] = np.empty((0, 256), dtype=np.float64)
event['front_sole_feats'] = np.empty((0, 256), dtype=np.float64)
event['feats_compose'] = np.empty((0, 256), dtype=np.float64)
event['feats_select'] = np.empty((0, 256), dtype=np.float64)
# '''读取 track.data 文件中的数据'''
# fpath_0_track = os.path.join(datapath, '0_track.data')
# fpath_1_track = os.path.join(datapath, '1_track.data')
# if os.path.exists(fpath_0_track) and os.path.isfile(fpath_0_track):
# _, _, _, _, tracking_boxes, tracking_feat_dict = extract_data(fpath_0_track)
# event['back_tracking_boxes'] = tracking_boxes
# event['back_tracking_feats'] = tracking_feat_dict
# if os.path.exists(fpath_1_track) and os.path.isfile(fpath_1_track):
# _, _, _, _, tracking_boxes, tracking_feat_dict = extract_data(fpath_1_track)
# event['front_tracking_boxes'] = tracking_boxes
# event['front_tracking_feats'] = tracking_feat_dict
# =============================================================================
# '''================1. 直接指定读取,速度快======================================'''
# '''读取 tracking_output.data 文件中的数据'''
# fpath_0_tracking = os.path.join(datapath, '0_tracking_output.data')
# fpath_1_tracking = os.path.join(datapath, '1_tracking_output.data')
# if os.path.exists(fpath_0_tracking) and os.path.isfile(fpath_0_tracking):
# tracking_output_boxes, tracking_output_feats = read_tracking_output(fpath_0_tracking)
# event['back_sole_boxes'] = tracking_output_boxes
# event['back_sole_feats'] = tracking_output_feats
#
# if os.path.exists(fpath_1_tracking) and os.path.isfile(fpath_1_tracking):
# tracking_output_boxes, tracking_output_feats = read_tracking_output(fpath_1_tracking)
# event['front_sole_boxes'] = tracking_output_boxes
# event['front_sole_feats'] = tracking_output_feats
# =============================================================================
'''================2. 遍历方式读取(与以上方式二选一)======================================'''
'''读取当前事件的 data 文件'''
for filename in os.listdir(datapath):
# filename = '1_track.data'
fpath = os.path.join(datapath, filename)
CamerType = filename.split('_')[0]
# if os.path.isfile(fpath) and filename.find("track.data")>0:
# bboxes, ffeats, trackerboxes, tracker_feat_dict, tracking_boxes, tracking_feat_dict = extract_data(fpath)
# if CamerType == '0':
# event['back_tracking_boxes'] = tracking_boxes
# event['back_tracking_feats'] = tracking_feat_dict
# elif CamerType == '1':
# event['front_tracking_boxes'] = tracking_boxes
# event['front_tracking_feats'] = tracking_feat_dict
if os.path.isfile(fpath) and filename.find("tracking_output.data")>0:
tracking_output_boxes, tracking_output_feats = read_tracking_output(fpath)
if CamerType == '0':
event['back_sole_boxes'] = tracking_output_boxes
event['back_sole_feats'] = tracking_output_feats
elif CamerType == '1':
event['front_sole_boxes'] = tracking_output_boxes
event['front_sole_feats'] = tracking_output_feats
'''事件的特征表征方式选择'''
fs_feats = event['front_sole_feats']
bs_feats = event['back_sole_feats']
'''1. 如果前后摄均没有轨迹选择输出,返回'''
condt1 = len(fs_feats) + len(bs_feats) == 0
if condt1:
return event
'''2. 构造综合特征'''
feats_compose = np.empty((0, 256), dtype=np.float64)
if len(fs_feats):
feats_compose = np.concatenate((feats_compose, fs_feats), axis=0)
if len(bs_feats):
feats_compose = np.concatenate((feats_compose, bs_feats), axis=0)
event['feats_compose'] = feats_compose
'''3. 构造前摄特征'''
if len(fs_feats):
event['feats_select'] = fs_feats
return event
'''4. 从前摄输出轨迹中选取特定轨迹对应的特征'''
# =============================================================================
# ftrboxes = event['front_tracking_boxes']
# ftrfeats = event['front_tracking_feats']
#
# condt2 = len(ftrboxes) + len(ftrfeats) == 0
# condt3 = len(ftrfeats) != len(ftrboxes)
# if condt2 or condt3:
# return event
#
# bprops = []
# for boxes in ftrboxes:
# track = Track(boxes)
# bprops.append(max(track.trajdist))
#
# index = bprops.index(max(bprops))
# box_select = ftrboxes[index]
# tid = int(box_select[0, 4])
#
# feat_select = ftrfeats[f"track_{tid}"]
# feats_select = np.empty((0, 256), dtype=np.float64)
# for fid_bid, feat in feat_select['feats'].items():
# feats_select = np.concatenate((feats_select, feat[None, :]), axis=0)
# event['feats_select'] = feats_select
# =============================================================================
return event
def creatd_deletedBarcode_front(filepath):
'''
生成deletedBarcodeTest.txt
'''
# filepath = r'\\192.168.1.28\share\测试_202406\0723\0723_1\deletedBarcode.txt'
basepath, _ = os.path.split(filepath)
bcdlist = read_deletedBarcode_file(filepath)
MatchList = []
k = 0
for s_list in bcdlist:
getout_fold = s_list['SeqDir'].strip()
getout_path = os.path.join(basepath, getout_fold)
'''取出事件文件夹不存在,跳出循环'''
if not os.path.exists(getout_path) and not os.path.isdir(getout_path):
continue
day, hms = getout_fold.strip('_').split('-')
''' 生成取出事件字典 '''
getout_event = {}
getout_event['barcode'] = s_list['Deleted'].strip()
getout_event['path'] = getout_path
getout_event['feats_compose'] = np.empty((0, 256), dtype=np.float64)
getout_event['feats_select'] = np.empty((0, 256), dtype=np.float64)
InputList = []
barcodes = [s.strip() for s in s_list['barcode']]
similarity = [float(s.strip()) for s in s_list['similarity']]
for i, barcode in enumerate(barcodes):
''' 生成放入事件字典 '''
input_event = {}
input_folds, times = [], []
for pathname in os.listdir(basepath):
if pathname.endswith('_'): continue
if os.path.isfile(os.path.join(basepath, pathname)):continue
infold = pathname.split('_')
if len(infold)!=2: continue
if len(infold[0])<=14 or len(infold[1])<=10: continue
day1, hms1 = infold[0].split('-')
if day1==day and infold[1]==barcode and int(hms1)<int(hms):
input_folds.append(pathname)
times.append(int(hms1))
if len(input_folds)==0: continue
''' 根据时间排序,选择离取出操作最近时间的文件夹,作为取出操作应的放入操作所对应的文件夹 '''
input_fold = input_folds[times.index(max(times))]
input_path = os.path.join(basepath, input_fold)
if not os.path.exists(getout_path) and not os.path.isdir(getout_path):
continue
input_event['barcode'] = barcode
input_event['path'] = input_path
input_event['similarity'] = float(similarity[i])
input_event['feats_compose'] = np.empty((0, 256), dtype=np.float64)
input_event['feats_select'] = np.empty((0, 256), dtype=np.float64)
InputList.append(input_event)
MatchList.append((getout_event, InputList))
# k += 1
# if k==2:
# break
print('Step 1: Event init Done!')
for getout_event, InputList in MatchList:
getout_path = getout_event['path']
if os.path.exists(getout_path) and os.path.isdir(getout_path):
event = update_event(getout_path)
getout_event.update(event)
'''====== 放入事件是在取出事件存在的情况下分析 ======'''
for input_event in InputList:
input_path = input_event['path']
if os.path.exists(input_path) and os.path.isdir(input_path):
event = update_event(input_path)
input_event.update(event)
print('Step 2: Event update Done!')
results = []
for getout_event, InputList in MatchList:
getout_barcode = getout_event['barcode']
getout_feats_compose = getout_event['feats_compose']
getout_feats_select = getout_event['feats_select']
if len(getout_feats_select):
outfeats_select = getout_feats_select.copy()
else:
outfeats_select = getout_feats_compose.copy()
result = {}
result['SeqDir'] = os.path.split(getout_event['path'])[1]
result['Deleted'] = getout_barcode
result['List'] = {}
for input_event in InputList:
input_barcode = input_event['barcode']
input_feats_compose = input_event['feats_compose']
input_feats_select = input_event['feats_select']
if len(input_feats_select):
infeats_select = input_feats_select.copy()
else:
infeats_select = input_feats_compose.copy()
similar_comp = compute_similar(getout_feats_compose, input_feats_compose)
similar_selt = compute_similar(outfeats_select, infeats_select)
'''现场测试相似度,组合特征相似度,前摄选择特征相似度'''
result['List'][f'{input_barcode}'] = (input_event['similarity'], similar_comp, similar_selt)
# result[f'{input_barcode}'] = (input_event['similarity'], similar_comp, similar_selt)
results.append(result)
print('Step 3: Similarity conputation Done!')
wpath = os.path.split(filepath)[0]
wfile = os.path.join(wpath, 'deletedBarcodeTest.txt')
with open(wfile, 'w', encoding='utf-8') as file:
for result in results:
SeqDir = result['SeqDir']
Deleted = result['Deleted']
file.write('\n')
file.write(f'SeqDir: {SeqDir}\n')
file.write(f'Deleted: {Deleted}\n')
file.write('List:\n')
for key, value in result['List'].items():
file.write(f'{key}: ')
file.write(f'{value[0]}, {value[1]:.3f}, {value[2]:.3f}\n')
print('Step 4: File writting Done!')
def precision_compare(filepath, savepath):
'''
1. deletedBarcode.txt 中的相似度的计算为现场算法前后摄轨迹特征合并
2. deletedBarcodeTest.txt 中的 3 个相似度计算方式依次为:
(1)现场算法前后摄轨迹特征合并;
(2)本地算法前后摄轨迹特征合并;
(3)本地算法优先选择前摄
'''
fpath = os.path.split(filepath)[0]
_, basefile = os.path.split(fpath)
'''1. 综合前后摄特征的相似度比对性能'''
fpath1 = os.path.join(fpath, 'deletedBarcode.txt')
blist1 = read_deletedBarcode_file(fpath1)
errpairs, corrpairs, err_similarity, correct_similarity = performance_evaluate(blist1)
recall, prec, ths = compute_recall_precision(err_similarity, correct_similarity)
os.path.split(fpath1)
plt1 = show_recall_prec(recall, prec, ths)
# plt1.show()
plt1.xlabel(f'threshold, Num: {len(blist1)}')
plt1.title(basefile + ', compose')
plt1.savefig(os.path.join(savepath, basefile+'_pr.png'))
plt1.close()
'''2. 优先选取前摄特征的相似度比对性能'''
fpath2 = os.path.join(fpath, 'deletedBarcodeTest.txt')
blist2 = read_deletedBarcode_file(fpath2)
front_errpairs, front_corrpairs, front_err_similarity, front_correct_similarity = performance_evaluate(blist2)
front_recall, front_prec, front_ths = compute_recall_precision(front_err_similarity, front_correct_similarity)
plt2 = show_recall_prec(front_recall, front_prec, front_ths)
# plt2.show()
plt2.xlabel(f'threshold, Num: {len(blist2)}')
plt1.title(basefile + ', front')
plt2.savefig(os.path.join(savepath, basefile+'_pr_front.png'))
plt2.close()
def main():
'''
1. 成deletedBarcodeTest.txt
2. 不同特征选择下的精度比对性能比较
'''
fplist = [#r'\\192.168.1.28\share\测试_202406\0723\0723_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0723\0723_2\deletedBarcode.txt',
r'\\192.168.1.28\share\测试_202406\0723\0723_3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0722\0722_01\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0722\0722_02\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0719\719_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0719\719_2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0719\719_3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0719\719_4\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0718\0718-1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0718\0718-2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0717\0717-1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0717\0717-2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0717\0717-3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0716\0716_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0716\0716_2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0716\0716_3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0715\0715_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0715\0715_2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0715\0715_3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0712\0712_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\0712\0712_2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\711\images01\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\711\images02\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\710\images_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\710\images_2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\709\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\705\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\703\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\702_pm_1\702_pm_1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\702_pm_3\702_pm_3\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\701_am\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\701_pm\701_pm\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\628\1\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\628\2\deletedBarcode.txt',
# r'\\192.168.1.28\share\测试_202406\627\deletedBarcode.txt',
]
savepath = r'\\192.168.1.28\share\测试_202406\deletedBarcode\illustration'
for filepath in fplist:
try:
#1. 生成deletedBarcodeTest.txt 文件
creatd_deletedBarcode_front(filepath)
#2. 确保该目录下存在deletedBarcode.txt, deletedBarcodeTest.txt 文件
precision_compare(filepath, savepath)
except Exception as e:
print(f'{filepath}, Error: {e}')
if __name__ == '__main__':
main()

277
contrast/one2n_contrast.py Normal file
View File

@ -0,0 +1,277 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 18 11:49:01 2024
@author: ym
"""
import os
import pickle
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from utils.event import ShoppingEvent
def init_eventdict(sourcePath, stype="data"):
'''stype: str,
'source': 由 videos 或 images 生成的 pickle 文件
'data': 从 data 文件中读取的现场运行数据
'''
k, errEvents = 0, []
for bname in os.listdir(sourcePath):
# bname = r"20241126-135911-bdf91cf9-3e9a-426d-94e8-ddf92238e175_6923555210479"
source_path = os.path.join(sourcePath, bname)
if stype=="data":
pickpath = os.path.join(eventDataPath, f"{bname}.pickle")
if not os.path.isdir(source_path) or os.path.isfile(pickpath):
continue
if stype=="source":
pickpath = os.path.join(eventDataPath, bname)
if not os.path.isfile(source_path) or os.path.isfile(pickpath):
continue
try:
event = ShoppingEvent(source_path, stype)
with open(pickpath, 'wb') as f:
pickle.dump(event, f)
print(bname)
except Exception as e:
errEvents.append(source_path)
print(e)
# k += 1
# if k==1:
# break
errfile = os.path.join(resultPath, 'error_events.txt')
with open(errfile, 'a', encoding='utf-8') as f:
for line in errEvents:
f.write(line + '\n')
def read_eventdict(eventDataPath):
evtDict = {}
for filename in os.listdir(eventDataPath):
evtname, ext = os.path.splitext(filename)
if ext != ".pickle": continue
evtpath = os.path.join(eventDataPath, filename)
with open(evtpath, 'rb') as f:
evtdata = pickle.load(f)
evtDict[evtname] = evtdata
return evtDict
def simi_calc(event, o2nevt, typee=None):
if typee == "11":
boxes1 = event.front_boxes
boxes2 = o2nevt.front_boxes
feat1 = event.front_feats
feat2 = o2nevt.front_feats
if typee == "10":
boxes1 = event.front_boxes
boxes2 = o2nevt.back_boxes
feat1 = event.front_feats
feat2 = o2nevt.back_feats
if typee == "00":
boxes1 = event.back_boxes
boxes2 = o2nevt.back_boxes
feat1 = event.back_feats
feat2 = o2nevt.back_feats
if typee == "01":
boxes1 = event.back_boxes
boxes2 = o2nevt.front_boxes
feat1 = event.back_feats
feat2 = o2nevt.front_feats
'''自定义事件特征选择'''
if typee==3:
feat1 = event.feats_compose
feat2 = o2nevt.feats_compose
if len(feat1) and len(feat2):
matrix = 1 - cdist(feat1[0], feat2[0], 'cosine')
simi = np.mean(matrix)
else:
simi = None
return simi
def one2n_pr(evtDicts, pattern=1):
'''
pattern:
1: process.data 中记录的相似度
2: 根据 process.data 中标记的 type 选择特征计算
3: 以其它方式选择特征计算
'''
tpevents, fnevents, fpevents, tnevents = [], [], [], []
tpsimi, fnsimi, tnsimi, fpsimi = [], [], [], []
errorFile_one2n = []
for evtname, event in evtDicts.items():
evt_names, evt_barcodes, evt_similars, evt_types = [], [], [], []
for ndict in event.one2n:
nname = ndict["event"]
barcode = ndict["barcode"]
similar = ndict["similar"]
typee = ndict["type"].strip()
evt_names.append(nname)
evt_barcodes.append(barcode)
evt_types.append(typee)
if pattern==1:
evt_similars.append(similar)
if pattern==2 or pattern==3:
o2n_evt = [evt for name, evt in evtDicts.items() if name.find(nname[:15])==0]
if len(o2n_evt)==1:
o2nevt = o2n_evt[0]
else:
continue
if pattern==2:
simival = simi_calc(event, o2nevt, typee)
if pattern==3:
simival = simi_calc(event, o2nevt, typee=pattern)
if simival==None:
continue
evt_similars.append(simival)
if len(evt_names)==len(evt_barcodes) and len(evt_barcodes)==len(evt_similars) \
and len(evt_similars)==len(evt_types) and len(evt_names)>0:
# maxsim = evt_similars[evt_similars.index(max(evt_similars))]
maxsim = max(evt_similars)
for i in range(len(evt_names)):
bcd, simi = evt_barcodes[i], evt_similars[i]
if bcd==event.barcode and simi==maxsim:
tpsimi.append(simi)
tpevents.append(evtname)
elif bcd==event.barcode and simi!=maxsim:
fnsimi.append(simi)
fnevents.append(evtname)
elif bcd!=event.barcode and simi!=maxsim:
tnsimi.append(simi)
tnevents.append(evtname)
elif bcd!=event.barcode and simi==maxsim and event.barcode in evt_barcodes:
fpsimi.append(simi)
fpevents.append(evtname)
else:
errorFile_one2n.append(evtname)
''' 1:n 数据存储,需根据相似度排序'''
PPrecise, PRecall = [], []
NPrecise, NRecall = [], []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
'''============================= 1:n 计算'''
TP = sum(np.array(tpsimi) >= th)
FP = sum(np.array(fpsimi) >= th)
FN = sum(np.array(fnsimi) < th)
TN = sum(np.array(tnsimi) < th)
PPrecise.append(TP/(TP+FP+1e-6))
PRecall.append(TP/(len(tpsimi)+len(fnsimi)+1e-6))
NPrecise.append(TN/(TN+FN+1e-6))
NRecall.append(TN/(len(tnsimi)+len(fpsimi)+1e-6))
'''4. ============================= 1:n 曲线,'''
fig, ax = plt.subplots()
ax.plot(Thresh, PPrecise, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecall, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPrecise, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecall, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:n Precise & Recall')
ax.set_xlabel(f"Event Num: {len(tpsimi)+len(fnsimi)}")
ax.legend()
plt.show()
## ============================= 1:n 直方图'''
fig, axes = plt.subplots(2, 2)
axes[0, 0].hist(tpsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 0].set_xlim([-0.2, 1])
axes[0, 0].set_title('TP')
axes[0, 1].hist(fpsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 1].set_xlim([-0.2, 1])
axes[0, 1].set_title('FP')
axes[1, 0].hist(tnsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 0].set_xlim([-0.2, 1])
axes[1, 0].set_title('TN')
axes[1, 1].hist(fnsimi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 1].set_xlim([-0.2, 1])
axes[1, 1].set_title('FN')
plt.show()
return fpevents
def main():
'''1. 生成事件字典并保存至 eventDataPath, 只需运行一次 '''
init_eventdict(eventSourcePath, stype="source")
'''2. 读取事件字典 '''
evtDicts = read_eventdict(eventDataPath)
'''3. 1:n 比对事件评估 '''
fpevents = one2n_pr(evtDicts, pattern=3)
fpErrFile = str(Path(resultPath).joinpath("one2n_fp_Error.txt"))
with open(fpErrFile, "w") as file:
for item in fpevents:
file.write(item + "\n")
if __name__ == '__main__':
eventSourcePath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\ShoppingDict_pkfile"
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\contrast"
eventDataPath = os.path.join(resultPath, "evtobjs")
similPath = os.path.join(resultPath, "simidata")
if not os.path.exists(eventDataPath):
os.makedirs(eventDataPath)
if not os.path.exists(similPath):
os.makedirs(similPath)
main()

View File

@ -0,0 +1,498 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Jul ** 14:07:25 2024
现场测试精度、召回率分析程序,是 feat_select.py 的简化版,
但支持循环计算并输出总的pr曲线
@author: ym
"""
import os.path
import shutil
import numpy as np
import matplotlib.pyplot as plt
import cv2
from pathlib import Path
import sys
sys.path.append(r"D:\DetectTracking")
from tracking.utils.plotting import Annotator, colors
from tracking.utils.read_data import extract_data, read_deletedBarcode_file, read_tracking_output, read_returnGoods_file
from tracking.utils.plotting import draw_tracking_boxes, get_subimgs
from contrast.utils.tools import showHist, show_recall_prec, compute_recall_precision
# =============================================================================
# def read_tracking_output(filepath):
# boxes = []
# feats = []
# with open(filepath, 'r', encoding='utf-8') as file:
# for line in file:
# line = line.strip() # 去除行尾的换行符和可能的空白字符
#
# if not line:
# continue
#
# if line.endswith(','):
# line = line[:-1]
#
# data = np.array([float(x) for x in line.split(",")])
# if data.size == 9:
# boxes.append(data)
# if data.size == 256:
# feats.append(data)
#
# return np.array(boxes), np.array(feats)
# =============================================================================
def read_tracking_imgs(imgspath):
'''
input:
imgspath该路径中的图像为Yolo算法的输入图像640x512
output
imgs_0后摄图像根据 frameId 进行了排序
imgs_1前摄图像根据 frameId 进行了排序
'''
imgs_0, frmIDs_0, imgs_1, frmIDs_1 = [], [], [], []
for filename in os.listdir(imgspath):
file, ext = os.path.splitext(filename)
flist = file.split('_')
if len(flist)==4 and ext==".jpg":
camID, frmID = flist[0], int(flist[-1])
imgpath = os.path.join(imgspath, filename)
img = cv2.imread(imgpath)
if camID=='0':
imgs_0.append(img)
frmIDs_0.append(frmID)
if camID=='1':
imgs_1.append(img)
frmIDs_1.append(frmID)
if len(frmIDs_0):
indice = np.argsort(np.array(frmIDs_0))
imgs_0 = [imgs_0[i] for i in indice ]
if len(frmIDs_1):
indice = np.argsort(np.array(frmIDs_1))
imgs_1 = [imgs_1[i] for i in indice ]
return imgs_0, imgs_1
# =============================================================================
# def draw_tracking_boxes(imgs, tracks):
# '''tracks: [x1, y1, x2, y2, track_id, score, cls, frame_index, box_index]
# 0 1 2 3 4 5 6 7 8
# 关键imgs中的次序和 track 中的 fid 对应
# '''
# subimgs = []
# for *xyxy, tid, conf, cls, fid, bid in tracks:
# label = f'id:{int(tid)}_{int(cls)}_{conf:.2f}'
#
# annotator = Annotator(imgs[int(fid-1)].copy())
# if cls==0:
# color = colors(int(cls), True)
# elif tid>0 and cls!=0:
# color = colors(int(tid), True)
# else:
# color = colors(19, True) # 19为调色板的最后一个元素
#
# pt2 = [p/2 for p in xyxy]
# annotator.box_label(pt2, label, color=color)
# img0 = annotator.result()
#
# subimgs.append(img0)
#
# return subimgs
# =============================================================================
def get_contrast_paths(pair, basepath):
assert(len(pair)==2 or len(pair)==3), "pair: seqdir, delete, barcodes"
getout_fold = pair[0] # 取出操作对应的文件夹
relvt_barcode = pair[1] # 取出操作对应放入操作的 Barcode
if len(pair)==3:
error_match = pair[2] # 取出操作错误匹配的 Barcode
else:
error_match = ''
getoutpath, inputpath, errorpath = '', '', ''
day, hms = getout_fold.strip('_').split('-')
input_folds, times = [], []
errmatch_folds, errmatch_times = [], []
for pathname in os.listdir(basepath):
if pathname.endswith('_'): continue
if os.path.isfile(os.path.join(basepath, pathname)):continue
infold = pathname.split('_')
if len(infold)!=2: continue
day1, hms1 = infold[0].split('-')
if day1==day and infold[1]==relvt_barcode and int(hms1)<int(hms):
input_folds.append(pathname)
times.append(int(hms1))
if day1==day and len(error_match) and infold[1]==error_match and int(hms1)<int(hms):
errmatch_folds.append(pathname)
errmatch_times.append(int(hms1))
''' 根据时间排序,选择离取出操作最近时间的文件夹,
作为取出操作应正确匹配的放入操作所对应的文件夹 '''
if len(input_folds):
indice = np.argsort(np.array(times))
input_fold = input_folds[indice[-1]]
inputpath = os.path.join(basepath, input_fold)
'''取出操作错误匹配的放入操作对应的文件夹'''
if len(errmatch_folds):
indice = np.argsort(np.array(errmatch_times))
errmatch_fold = errmatch_folds[indice[-1]]
errorpath = os.path.join(basepath, errmatch_fold)
'''放入事件文件夹地址、取出事件文件夹地址'''
getoutpath = os.path.join(basepath, getout_fold)
return getoutpath, inputpath, errorpath
def save_tracking_imgpairs(pairs, savepath):
'''
pairs: 匹配事件对
savepath: 保存的目标文件夹
'''
def get_event_path(evtpath):
basepath, eventname = os.path.split(evtpath)
evt_path = ''
for filename in os.listdir(basepath):
if filename.find(eventname)==0:
evt_path = os.path.join(basepath, filename)
break
return evt_path
getoutpath = get_event_path(pairs[0])
inputpath = get_event_path(pairs[1])
if len(pairs) == 3:
errorpath = get_event_path(pairs[2])
else:
errorpath = ''
''' 1. 读取放入、取出事件对应的 Yolo输入的前后摄图像0后摄1前摄
2. 读取放入、取出事件对应的 tracking 输出boxes, feats
3. boxes绘制并保存图像序列
4. 截取并保存轨迹子图
'''
if len(getoutpath):
imgs_getout_0, imgs_getout_1 = read_tracking_imgs(getoutpath)
getout_data_0 = os.path.join(getoutpath, '0_tracking_output.data')
getout_data_1 = os.path.join(getoutpath, '1_tracking_output.data')
boxes_output_0, feats_output_0 = read_tracking_output(getout_data_0)
boxes_output_1, feats_output_1 = read_tracking_output(getout_data_1)
ImgsGetout_0 = draw_tracking_boxes(imgs_getout_0, boxes_output_0)
ImgsGetout_1 = draw_tracking_boxes(imgs_getout_1, boxes_output_1)
SubimgsGetout_0 = get_subimgs(imgs_getout_0, boxes_output_0)
SubimgsGetout_1 = get_subimgs(imgs_getout_1, boxes_output_1)
savedir = os.path.basename(getoutpath)
if len(inputpath):
imgs_input_0, imgs_input_1 = read_tracking_imgs(inputpath)
input_data_0 = os.path.join(inputpath, '0_tracking_output.data')
input_data_1 = os.path.join(inputpath, '1_tracking_output.data')
boxes_input_0, feats_input_0 = read_tracking_output(input_data_0)
boxes_input_1, feats_input_1 = read_tracking_output(input_data_1)
ImgsInput_0 = draw_tracking_boxes(imgs_input_0, boxes_input_0)
ImgsInput_1 = draw_tracking_boxes(imgs_input_1, boxes_input_1)
SubimgsInput_0 = get_subimgs(imgs_input_0, boxes_input_0)
SubimgsInput_1 = get_subimgs(imgs_input_1, boxes_input_1)
savedir = savedir + '+' + os.path.basename(inputpath)
if len(errorpath):
imgs_error_0, imgs_error_1 = read_tracking_imgs(errorpath)
error_data_0 = os.path.join(errorpath, '0_tracking_output.data')
error_data_1 = os.path.join(errorpath, '1_tracking_output.data')
boxes_error_0, feats_error_0 = read_tracking_output(error_data_0)
boxes_error_1, feats_error_1 = read_tracking_output(error_data_1)
ImgsError_0 = draw_tracking_boxes(imgs_error_0, boxes_error_0)
ImgsError_1 = draw_tracking_boxes(imgs_error_1, boxes_error_1)
SubimgsError_0 = get_subimgs(imgs_error_0, boxes_error_0)
SubimgsError_1 = get_subimgs(imgs_error_0, boxes_error_0)
savedir = savedir + '+' + os.path.basename(errorpath)
''' savepath\pairs\savedir\eventpairs\保存画框后的图像序列 '''
entpairs = os.path.join(savepath, 'pairs', savedir, 'eventpairs')
if not os.path.exists(entpairs):
os.makedirs(entpairs)
for fid, img in ImgsInput_0:
imgpath = os.path.join(entpairs, f'input_0_{fid}.png')
cv2.imwrite(imgpath, img)
for fid, img in ImgsInput_1:
imgpath = os.path.join(entpairs, f'input_1_{fid}.png')
cv2.imwrite(imgpath, img)
for fid, img in ImgsGetout_0:
imgpath = os.path.join(entpairs, f'getout_0_{fid}.png')
cv2.imwrite(imgpath, img)
for fid, img in ImgsGetout_1:
imgpath = os.path.join(entpairs, f'getout_1_{fid}.png')
cv2.imwrite(imgpath, img)
if 'ImgsError_0' in vars() and 'ImgsError_1' in vars():
for fid, img in ImgsError_0:
imgpath = os.path.join(entpairs, f'errMatch_0_{fid}.png')
cv2.imwrite(imgpath, img)
for fid, img in ImgsError_1:
imgpath = os.path.join(entpairs, f'errMatch_1_{fid}.png')
cv2.imwrite(imgpath, img)
''' savepath\pairs\savedir\subimgpairs\保存轨迹子图 '''
subimgpairs = os.path.join(savepath, 'pairs', savedir, 'subimgpairs')
if not os.path.exists(subimgpairs):
os.makedirs(subimgpairs)
for fid, bid, img in SubimgsGetout_0:
imgpath = os.path.join(subimgpairs, f'getout_0_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
for fid, bid, img in SubimgsGetout_1:
imgpath = os.path.join(subimgpairs, f'getout_1_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
for fid, bid, img in SubimgsInput_0:
imgpath = os.path.join(subimgpairs, f'input_0_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
for fid, bid, img in SubimgsInput_1:
imgpath = os.path.join(subimgpairs, f'input_1_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
if 'SubimgsError_0' in vars() and 'SubimgsError_1' in vars():
for fid, bid, img in SubimgsError_0:
imgpath = os.path.join(subimgpairs, f'errMatch_0_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
for fid, bid, img in SubimgsError_1:
imgpath = os.path.join(subimgpairs, f'errMatch_1_{fid}_{bid}.png')
cv2.imwrite(imgpath, img)
def one2n_deleted(all_list):
corrpairs, errpairs, correct_similarity, err_similarity = [], [], [], []
for s_list in all_list:
seqdir = s_list['SeqDir'].strip()
delete = s_list['Deleted'].strip()
barcodes = [s.strip() for s in s_list['barcode']]
similarity_comp, similarity_front = [], []
for simil in s_list['similarity']:
ss = [float(s.strip()) for s in simil.split(',')]
similarity_comp.append(ss[0])
if len(ss)==3:
similarity_front.append(ss[2])
if len(similarity_front):
similarity = [s for s in similarity_front]
else:
similarity = [s for s in similarity_comp]
index = similarity.index(max(similarity))
matched_barcode = barcodes[index]
if matched_barcode == delete:
corrpairs.append((seqdir, delete))
correct_similarity.append(max(similarity))
else:
errpairs.append((seqdir, delete, matched_barcode))
err_similarity.append(max(similarity))
return corrpairs, errpairs, correct_similarity, err_similarity
def one2n_return(all_list):
corrpairs, corr_similarity, errpairs, err_similarity = [], [], [], []
for s_list in all_list:
seqdir = s_list['SeqDir'].strip()
delete = s_list['Deleted'].strip()
barcodes = [s.strip() for s in s_list['barcode']]
events = [s.strip() for s in s_list['event']]
types = [s.strip() for s in s_list['type']]
## =================== 读入相似度值
similarity_comp, similarity_front = [], []
for simil in s_list['similarity']:
ss = [float(s.strip()) for s in simil.split(',')]
similarity_comp.append(ss[0])
if len(ss)==3:
similarity_front.append(ss[2])
if len(similarity_front):
similarity = [s for s in similarity_front]
else:
similarity = [s for s in similarity_comp]
index = similarity.index(max(similarity))
matched_barcode = barcodes[index]
if matched_barcode == delete:
corrpairs.append((seqdir, events[index]))
corr_similarity.append(max(similarity))
else:
idx = [i for i, name in enumerate(events) if name.split('_')[-1] == delete]
idxmax, simimax = -1, -1
# idxmax, simimax = k, similarity[k] for k in idx if similarity[k] > simimax
for k in idx:
if similarity[k] > simimax:
idxmax = k
simimax = similarity[k]
if idxmax>-1:
input_event = events[idxmax]
else:
input_event = ''
errpairs.append((seqdir, input_event, events[index]))
err_similarity.append(max(similarity))
return corrpairs, errpairs, corr_similarity, err_similarity
def test_rpath_deleted():
'''deletedBarcode.txt 格式的 1:n 数据结果文件, returnGoods.txt格式数据文件不需要调用该函数'''
del_bfile = r'\\192.168.1.28\share\测试_202406\709\deletedBarcode.txt'
basepath = r'\\192.168.1.28\share\测试_202406\709'
savepath = r'D:\DetectTracking\contrast\result'
saveimgs = True
relative_paths = []
'''1. 读取 deletedBarcode 文件 '''
all_list = read_deletedBarcode_file(del_bfile)
'''2. 算法性能评估,并输出 (取出,删除, 错误匹配) 对 '''
corrpairs, errpairs, _, _ = one2n_deleted(all_list)
'''3. 构造事件组合(取出,放入并删除, 错误匹配) 对应路径 '''
for errpair in errpairs:
GetoutPath, InputPath, ErrorPath = get_contrast_paths(errpair, basepath)
pairs = (GetoutPath, InputPath, ErrorPath)
relative_paths.append(pairs)
print(InputPath)
'''3. 获取 (取出,放入并删除, 错误匹配) 对应路径,保存相应轨迹图像'''
if saveimgs:
save_tracking_imgpairs(pairs, savepath)
def test_rpath_return():
return_bfile = r'\\192.168.1.28\share\测试_202406\1101\images\returnGoods.txt'
basepath = r'\\192.168.1.28\share\测试_202406\1101\images'
savepath = r'D:\DetectTracking\contrast\result'
all_list = read_returnGoods_file(return_bfile)
corrpairs, errpairs, _, _ = one2n_return(all_list)
for corrpair in corrpairs:
GetoutPath = os.path.join(basepath, corrpair[0])
InputPath = os.path.join(basepath, corrpair[1])
pairs = (GetoutPath, InputPath)
save_tracking_imgpairs(pairs, savepath)
for errpair in errpairs:
GetoutPath = os.path.join(basepath, errpair[0])
InputPath = os.path.join(basepath, errpair[1])
ErrorPath = os.path.join(basepath, errpair[2])
pairs = (GetoutPath, InputPath, ErrorPath)
save_tracking_imgpairs(pairs, savepath)
def test_one2n():
'''
1:n 性能测试
兼容 2 种 txt 文件格式returnGoods.txt, deletedBarcode.txt
fpath: 文件路径、或文件夹,其中包含多个 txt 文件
savepath: pr曲线保存路径
'''
# fpath = r'\\192.168.1.28\share\测试_202406\deletedBarcode\other' # deletedBarcode.txt
fpath = r'\\192.168.1.28\share\测试_202406\1108_展厅模型v800测试' # returnGoods.txt
savepath = r'\\192.168.1.28\share\测试_202406\deletedBarcode\illustration'
if os.path.isdir(fpath):
filepaths = [os.path.join(fpath, f) for f in os.listdir(fpath)
if f.find('.txt')>0
and (f.find('deletedBarcode')>=0 or f.find('returnGoods')>=0)]
elif os.path.isfile(fpath):
filepaths = [fpath]
else:
return
if not os.path.exists(savepath):
os.mkdir(savepath)
BarLists, blists = {}, []
for pth in filepaths:
file = str(Path(pth).stem)
if file.find('deletedBarcode')>=0:
blist = read_deletedBarcode_file(pth)
if file.find('returnGoods')>=0:
blist = read_returnGoods_file(pth)
BarLists.update({file: blist})
blists.extend(blist)
if len(blists): BarLists.update({"Total": blists})
for file, blist in BarLists.items():
if all(b['filetype']=="deletedBarcode" for b in blist):
_, _, correct_similarity, err_similarity = one2n_deleted(blist)
if all(b['filetype']=="returnGoods" for b in blists):
_, _, correct_similarity, err_similarity = one2n_return(blist)
recall, prec, ths = compute_recall_precision(err_similarity, correct_similarity)
plt1 = show_recall_prec(recall, prec, ths)
# plt1.show()
plt1.xlabel(f'threshold, Num: {len(blist)}')
plt1.savefig(os.path.join(savepath, file+'_pr.png'))
# plt1.close()
plt2 = showHist(err_similarity, correct_similarity)
plt2.show()
plt2.savefig(os.path.join(savepath, file+'_hist.png'))
# plt.close()
if __name__ == '__main__':
# test_one2n()
test_rpath_return() # returnGoods.txt
# test_rpath_deleted() # deleteBarcode.txt
# try:
# test_rpath_return()
# test_rpath_deleted()
# except Exception as e:
# print(e)

View File

@ -0,0 +1,648 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 30 17:53:03 2024
功能1:1比对性能测试程序
1. 基于标准特征集所对应的原始图像样本,生成标准特征集并保存。
func: generate_event_and_stdfeatures():
(1) get_std_barcodeDict(stdSamplePath, stdBarcodePath)
提取 stdSamplePath 中样本地址,生成字典{barcode: [imgpath1, imgpath1, ...]}
并存储为 pickle 文件barcode.pickle'''
(2) stdfeat_infer(stdBarcodePath, stdFeaturePath, bcdSet=None)
标准特征提取,并保存至文件夹 stdFeaturePath 中,
也可在运行过程中根据与购物事件集合 barcodes 交集执行
2. 1:1 比对性能测试,
func: one2one_simi()
(1) 求购物事件和标准特征级 Barcode 交集,构造 evtDict、stdDict
(2) 构造扫 A 放 A、扫 A 放 B 组合mergePairs = AA_list + AB_list
(3) 循环计算 mergePairs 中元素 "(A, A) 或 (A, B)" 相似度;
对于未保存的轨迹图像或标准 barcode 图像,保存图像
(4) 保存计算结果
3. precise、recall等指标计算
func: compute_one2one_pr(pickpath)
@author: ym
"""
import numpy as np
import cv2
import os
import sys
import random
import pickle
import json
import random
import copy
import sys
# import torch
import time
# import json
from pathlib import Path
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import shutil
from datetime import datetime
# from openpyxl import load_workbook, Workbook
# from config import config as conf
# from model import resnet18 as resnet18
# from feat_inference import inference_image
sys.path.append(r"D:\DetectTracking")
from tracking.utils.read_data import extract_data, read_tracking_output, read_similar, read_deletedBarcode_file
from tracking.utils.plotting import Annotator, colors
from feat_extract.config import config as conf
from feat_extract.inference import FeatsInterface
from utils.event import ShoppingEvent, save_data
from genfeats import gen_bcd_features
from event_test import calc_simil
def int8_to_ft16(arr_uint8, amin, amax):
arr_ft16 = (arr_uint8 / 255 * (amax-amin) + amin).astype(np.float16)
return arr_ft16
def ft16_to_uint8(arr_ft16):
# pickpath = r"\\192.168.1.28\share\测试_202406\contrast\std_features_ft32vsft16\6902265587712_ft16.pickle"
# with open(pickpath, 'rb') as f:
# edict = pickle.load(f)
# arr_ft16 = edict['feats']
amin = np.min(arr_ft16)
amax = np.max(arr_ft16)
arr_ft255 = (arr_ft16 - amin) * 255 / (amax-amin)
arr_uint8 = arr_ft255.astype(np.uint8)
arr_ft16_ = int8_to_ft16(arr_uint8, amin, amax)
arrDistNorm = np.linalg.norm(arr_ft16_ - arr_ft16) / arr_ft16_.size
return arr_uint8, arr_ft16_
def data_precision_compare(stdfeat, evtfeat, evtMessage, save=True):
evt, stdbcd, label = evtMessage
rltdata, rltdata_ft16, rltdata_ft16_ = [], [], []
matrix = 1 - cdist(stdfeat, evtfeat, 'cosine')
simi_mean = np.mean(matrix)
simi_max = np.max(matrix)
stdfeatm = np.mean(stdfeat, axis=0, keepdims=True)
evtfeatm = np.mean(evtfeat, axis=0, keepdims=True)
simi_mfeat = 1- np.maximum(0.0, cdist(stdfeatm, evtfeatm, 'cosine'))
rltdata = [label, stdbcd, evt, simi_mean, simi_max, simi_mfeat[0,0]]
##================================================================= float16
stdfeat_ft16 = stdfeat.astype(np.float16)
evtfeat_ft16 = evtfeat.astype(np.float16)
stdfeat_ft16 /= np.linalg.norm(stdfeat_ft16, axis=1)[:, None]
evtfeat_ft16 /= np.linalg.norm(evtfeat_ft16, axis=1)[:, None]
matrix_ft16 = 1 - cdist(stdfeat_ft16, evtfeat_ft16, 'cosine')
simi_mean_ft16 = np.mean(matrix_ft16)
simi_max_ft16 = np.max(matrix_ft16)
stdfeatm_ft16 = np.mean(stdfeat_ft16, axis=0, keepdims=True)
evtfeatm_ft16 = np.mean(evtfeat_ft16, axis=0, keepdims=True)
simi_mfeat_ft16 = 1- np.maximum(0.0, cdist(stdfeatm_ft16, evtfeatm_ft16, 'cosine'))
rltdata_ft16 = [label, stdbcd, evt, simi_mean_ft16, simi_max_ft16, simi_mfeat_ft16[0,0]]
'''****************** uint8 is ok!!!!!! ******************'''
##=================================================================== uint8
# stdfeat_uint8, stdfeat_ft16_ = ft16_to_uint8(stdfeat_ft16)
# evtfeat_uint8, evtfeat_ft16_ = ft16_to_uint8(evtfeat_ft16)
stdfeat_uint8 = (stdfeat_ft16*128).astype(np.int8)
evtfeat_uint8 = (evtfeat_ft16*128).astype(np.int8)
stdfeat_ft16_ = stdfeat_uint8.astype(np.float16)/128
evtfeat_ft16_ = evtfeat_uint8.astype(np.float16)/128
absdiff = np.linalg.norm(stdfeat_ft16_ - stdfeat) / stdfeat.size
matrix_ft16_ = 1 - cdist(stdfeat_ft16_, evtfeat_ft16_, 'cosine')
simi_mean_ft16_ = np.mean(matrix_ft16_)
simi_max_ft16_ = np.max(matrix_ft16_)
stdfeatm_ft16_ = np.mean(stdfeat_ft16_, axis=0, keepdims=True)
evtfeatm_ft16_ = np.mean(evtfeat_ft16_, axis=0, keepdims=True)
simi_mfeat_ft16_ = 1- np.maximum(0.0, cdist(stdfeatm_ft16_, evtfeatm_ft16_, 'cosine'))
rltdata_ft16_ = [label, stdbcd, evt, simi_mean_ft16_, simi_max_ft16_, simi_mfeat_ft16_[0,0]]
if not save:
return
##========================================================= save as float32
rppath = os.path.join(similPath, f'{evt}_ft32.pickle')
with open(rppath, 'wb') as f:
pickle.dump(rltdata, f)
rtpath = os.path.join(similPath, f'{evt}_ft32.txt')
with open(rtpath, 'w', encoding='utf-8') as f:
for result in rltdata:
part = [f"{x:.3f}" if isinstance(x, float) else str(x) for x in result]
line = ', '.join(part)
f.write(line + '\n')
##========================================================= save as float16
rppath_ft16 = os.path.join(similPath, f'{evt}_ft16.pickle')
with open(rppath_ft16, 'wb') as f:
pickle.dump(rltdata_ft16, f)
rtpath_ft16 = os.path.join(similPath, f'{evt}_ft16.txt')
with open(rtpath_ft16, 'w', encoding='utf-8') as f:
for result in rltdata_ft16:
part = [f"{x:.3f}" if isinstance(x, float) else str(x) for x in result]
line = ', '.join(part)
f.write(line + '\n')
##=========================================================== save as uint8
rppath_uint8 = os.path.join(similPath, f'{evt}_uint8.pickle')
with open(rppath_uint8, 'wb') as f:
pickle.dump(rltdata_ft16_, f)
rtpath_uint8 = os.path.join(similPath, f'{evt}_uint8.txt')
with open(rtpath_uint8, 'w', encoding='utf-8') as f:
for result in rltdata_ft16_:
part = [f"{x:.3f}" if isinstance(x, float) else str(x) for x in result]
line = ', '.join(part)
f.write(line + '\n')
def simi_calc(event, stdfeat):
evtfeat = event.feats_compose
if isinstance(event.feats_select, list):
if len(event.feats_select) and len(event.feats_select[0]):
evtfeat = event.feats_select[0]
else:
return None, None, None
else:
evtfeat = event.feats_select
if len(evtfeat)==0 or len(stdfeat)==0:
return None, None, None
evtfeat /= np.linalg.norm(evtfeat, axis=1)[:, None]
stdfeat /= np.linalg.norm(stdfeat, axis=1)[:, None]
matrix = 1 - cdist(evtfeat, stdfeat, 'cosine')
matrix[matrix < 0] = 0
simi_mean = np.mean(matrix)
simi_max = np.max(matrix)
stdfeatm = np.mean(stdfeat, axis=0, keepdims=True)
evtfeatm = np.mean(evtfeat, axis=0, keepdims=True)
simi_mfeat = 1- np.maximum(0.0, cdist(stdfeatm, evtfeatm, 'cosine'))
return simi_mean, simi_max, simi_mfeat[0,0]
def build_std_evt_dict():
'''
stdFeaturePath: 标准特征集地址
eventDataPath: Event对象地址
'''
stdBarcode = [p.stem for p in Path(stdFeaturePath).iterdir() if p.is_file() and (p.suffix=='.json' or p.suffix=='.pickle')]
'''*********** USearch ***********'''
# stdFeaturePath = r"D:\contrast\stdlib\v11_test.json"
# stdBarcode = []
# stdlib = {}
# with open(stdFeaturePath, 'r', encoding='utf-8') as f:
# data = json.load(f)
# for dic in data['total']:
# barcode = dic['key']
# feature = np.array(dic['value'])
# stdBarcode.append(barcode)
# stdlib[barcode] = feature
'''======1. 购物事件列表,该列表中的 Barcode 存在于标准的 stdBarcode 内 ==='''
evtList = [(p.stem, p.stem.split('_')[-1]) for p in Path(eventDataPath).iterdir()
if p.is_file()
and p.suffix=='.pickle'
and (len(p.stem.split('_'))==2 or len(p.stem.split('_'))==3)
and p.stem.split('_')[-1].isdigit()
and p.stem.split('_')[-1] in stdBarcode
]
barcodes = set([bcd for _, bcd in evtList])
'''======2. 构建用于比对的标准特征字典 ============='''
stdDict = {}
for stdfile in os.listdir(stdFeaturePath):
barcode, ext = os.path.splitext(stdfile)
if barcode not in barcodes:
continue
stdpath = os.path.join(stdFeaturePath, stdfile)
if ext == ".json":
with open(stdpath, 'r', encoding='utf-8') as f:
stddata = json.load(f)
feat = np.array(stddata["value"])
stdDict[barcode] = feat
if ext == ".pickle":
with open(stdpath, 'rb') as f:
stddata = pickle.load(f)
feat = stddata["feats_ft32"]
stdDict[barcode] = feat
'''*********** USearch ***********'''
# stdDict = {}
# for barcode in barcodes:
# stdDict[barcode] = stdlib[barcode]
'''======3. 构建用于比对的操作事件字典 ============='''
evtDict = {}
for evtname, barcode in evtList:
evtpath = os.path.join(eventDataPath, evtname+'.pickle')
with open(evtpath, 'rb') as f:
evtdata = pickle.load(f)
evtDict[evtname] = evtdata
return evtList, evtDict, stdDict
def one2SN_pr(evtList, evtDict, stdDict):
std_barcodes = set([bcd for _, bcd in evtList])
tp_events, fn_events, fp_events, tn_events = [], [], [], []
tp_simi, fn_simi, tn_simi, fp_simi = [], [], [], []
errorFile_one2SN = []
SN = 9
for evtname, barcode in evtList:
bcd_selected = [barcode]
dset = list(std_barcodes - set([barcode]))
if len(dset) > SN:
random.shuffle(dset)
bcd_selected.extend(dset[:SN])
else:
bcd_selected.extend(dset)
event = evtDict[evtname]
## 无轨迹判断
if len(event.front_feats)+len(event.back_feats)==0:
print(evtname)
continue
barcodes, similars = [], []
for stdbcd in bcd_selected:
stdfeat = stdDict[stdbcd]
simi_mean, simi_max, simi_mfeat = simi_calc(event, stdfeat)
# simi_mean = calc_simil(event, stdfeat)
## 在event.front_feats和event.back_feats同时为空时此处不需要保护
# if simi_mean==None:
# continue
barcodes.append(stdbcd)
similars.append(simi_mean)
## 此处不需要保护
# if len(similars)==0:
# print(evtname)
# continue
max_idx = similars.index(max(similars))
max_sim = similars[max_idx]
for i in range(len(barcodes)):
bcd, simi = barcodes[i], similars[i]
if bcd==barcode and simi==max_sim:
tp_simi.append(simi)
tp_events.append(evtname)
elif bcd==barcode and simi!=max_sim:
fn_simi.append(simi)
fn_events.append(evtname)
elif bcd!=barcode and simi!=max_sim:
tn_simi.append(simi)
tn_events.append(evtname)
elif bcd!=barcode and simi==max_sim and barcode in barcodes:
fp_simi.append(simi)
fp_events.append(evtname)
else:
errorFile_one2SN.append(evtname)
PPreciseX, PRecallX = [], []
NPreciseX, NRecallX = [], []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
'''适用于 (Precise, Recall) 计算方式多个相似度计算并排序barcode相等且排名第一为 TP '''
'''===================================== 1:SN '''
TPX = sum(np.array(tp_simi) >= th)
FPX = sum(np.array(fp_simi) >= th)
FNX = sum(np.array(fn_simi) < th)
TNX = sum(np.array(tn_simi) < th)
PPreciseX.append(TPX/(TPX+FPX+1e-6))
PRecallX.append(TPX/(len(tp_simi)+len(fn_simi)+1e-6))
NPreciseX.append(TNX/(TNX+FNX+1e-6))
NRecallX.append(TNX/(len(tn_simi)+len(fp_simi)+1e-6))
fig, ax = plt.subplots()
ax.plot(Thresh, PPreciseX, 'r', label='Precise_Pos: TP/TPFP')
ax.plot(Thresh, PRecallX, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, NPreciseX, 'g', label='Precise_Neg: TN/TNFP')
ax.plot(Thresh, NRecallX, 'c', label='Recall_Neg: TN/TNFN')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('1:SN Precise & Recall')
ax.set_xlabel(f"Event Num: {len(evtList)}")
ax.legend()
plt.show()
## ============================= 1:N 展厅 直方图'''
fig, axes = plt.subplots(2, 2)
axes[0, 0].hist(tp_simi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 0].set_xlim([-0.2, 1])
axes[0, 0].set_title(f'TP({len(tp_simi)})')
axes[0, 1].hist(fp_simi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0, 1].set_xlim([-0.2, 1])
axes[0, 1].set_title(f'FP({len(fp_simi)})')
axes[1, 0].hist(tn_simi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 0].set_xlim([-0.2, 1])
axes[1, 0].set_title(f'TN({len(tn_simi)})')
axes[1, 1].hist(fn_simi, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1, 1].set_xlim([-0.2, 1])
axes[1, 1].set_title(f'FN({len(fn_simi)})')
plt.show()
def one2one_simi(evtList, evtDict, stdDict):
barcodes = set([bcd for _, bcd in evtList])
'''======1 构造 3 个事件对: 扫 A 放 A, 扫 A 放 B, 合并 ===================='''
AA_list = [(evtname, barcode, "same") for evtname, barcode in evtList]
AB_list = []
for evtname, barcode in evtList:
dset = list(barcodes.symmetric_difference(set([barcode])))
if len(dset):
idx = random.randint(0, len(dset)-1)
AB_list.append((evtname, dset[idx], "diff"))
mergePairs = AA_list + AB_list
'''======2 计算事件、标准特征集相似度 =================='''
rltdata = []
for i in range(len(mergePairs)):
evtname, stdbcd, label = mergePairs[i]
event = evtDict[evtname]
if len(event.feats_compose)==0: continue
stdfeat = stdDict[stdbcd] # float32
simi_mean, simi_max, simi_mfeat = simi_calc(event, stdfeat)
if simi_mean is None:
continue
rltdata.append((label, stdbcd, evtname, simi_mean, simi_max, simi_mfeat))
'''================ float32、16、int8 精度比较与存储 ============='''
# data_precision_compare(stdfeat, evtfeat, mergePairs[i], save=True)
return rltdata
def one2one_pr(rltdata):
Same, Cross = [], []
for label, stdbcd, evtname, simi_mean, simi_max, simi_mft in rltdata:
if label == "same":
Same.append(simi_max)
if label == "diff":
Cross.append(simi_max)
Same = np.array(Same)
Cross = np.array(Cross)
TPFN = len(Same)
TNFP = len(Cross)
# fig, axs = plt.subplots(2, 1)
# axs[0].hist(Same, bins=60, range=(-0.2, 1), edgecolor='black')
# axs[0].set_xlim([-0.2, 1])
# axs[0].set_title(f'Same Barcode, Num: {TPFN}')
# axs[1].hist(Cross, bins=60, range=(-0.2, 1), edgecolor='black')
# axs[1].set_xlim([-0.2, 1])
# axs[1].set_title(f'Cross Barcode, Num: {TNFP}')
# plt.savefig(f'./result/{file}_hist.png') # svg, png, pdf
Recall_Pos, Recall_Neg = [], []
Precision_Pos, Precision_Neg = [], []
Correct = []
Thresh = np.linspace(-0.2, 1, 100)
for th in Thresh:
TP = np.sum(Same > th)
FN = TPFN - TP
TN = np.sum(Cross < th)
FP = TNFP - TN
Recall_Pos.append(TP/TPFN)
Recall_Neg.append(TN/TNFP)
Precision_Pos.append(TP/(TP+FP+1e-6))
Precision_Neg.append(TN/(TN+FN+1e-6))
Correct.append((TN+TP)/(TPFN+TNFP))
fig, ax = plt.subplots()
ax.plot(Thresh, Correct, 'r', label='Correct: (TN+TP)/(TPFN+TNFP)')
ax.plot(Thresh, Recall_Pos, 'b', label='Recall_Pos: TP/TPFN')
ax.plot(Thresh, Recall_Neg, 'g', label='Recall_Neg: TN/TNFP')
ax.plot(Thresh, Precision_Pos, 'c', label='Precision_Pos: TP/(TP+FP)')
ax.plot(Thresh, Precision_Neg, 'm', label='Precision_Neg: TN/(TN+FN)')
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.grid(True)
ax.set_title('PrecisePos & PreciseNeg')
ax.set_xlabel(f"Same Num: {TPFN}, Cross Num: {TNFP}")
ax.legend()
plt.show()
rltpath = os.path.join(similPath, 'pr.png')
plt.savefig(rltpath) # svg, png, pdf
fig, axes = plt.subplots(2,1)
axes[0].hist(Same, bins=60, range=(-0.2, 1), edgecolor='black')
axes[0].set_xlim([-0.2, 1])
axes[0].set_title(f'TP({len(Same)})')
axes[1].hist(Cross, bins=60, range=(-0.2, 1), edgecolor='black')
axes[1].set_xlim([-0.2, 1])
axes[1].set_title(f'TN({len(Cross)})')
rltpath = os.path.join(similPath, 'hist.png')
plt.savefig(rltpath)
plt.show()
def gen_eventdict(sourcePath, saveimg=True):
k, errEvents = 0, []
for source_path in sourcePath:
evtpath, bname = os.path.split(source_path)
## 兼容事件的两种情况:文件夹 和 Yolo-Resnet-Tracker 的输出
if os.path.isfile(source_path):
bname, ext = os.path.splitext(bname)
evt = bname.split("_")
evt = bname.split('_')
condt = len(evt)>=2 and evt[-1].isdigit() and len(evt[-1])>=10
if not condt: continue
# bname = r"20241126-135911-bdf91cf9-3e9a-426d-94e8-ddf92238e175_6923555210479"
# source_path = os.path.join(evtpath, bname)
# 如果已完成事件生成,则不执行
pickpath = os.path.join(eventDataPath, f"{bname}.pickle")
if os.path.isfile(pickpath): continue
# event = ShoppingEvent(source_path, stype="data")
# with open(pickpath, 'wb') as f:
# pickle.dump(event, f)
try:
event = ShoppingEvent(source_path, stype="source")
# save_data(event, resultPath)
with open(pickpath, 'wb') as f:
pickle.dump(event, f)
print(bname)
except Exception as e:
errEvents.append(source_path)
print(e)
# k += 1
# if k==1:
# break
errfile = os.path.join(resultPath, 'error_events.txt')
with open(errfile, 'w', encoding='utf-8') as f:
for line in errEvents:
f.write(line + '\n')
def init_std_evt_dict():
'''==== 0. 生成事件列表和对应的 Barcodes列表 ==========='''
bcdList, event_spath = [], []
for evtpath in eventSourcePath:
for evtname in os.listdir(evtpath):
bname, ext = os.path.splitext(evtname)
## 处理事件的两种情况:文件夹 和 Yolo-Resnet-Tracker 的输出
fpath = os.path.join(evtpath, evtname)
if os.path.isfile(fpath) and (ext==".pkl" or ext==".pickle"):
evt = bname.split('_')
elif os.path.isdir(fpath):
evt = evtname.split('_')
else:
continue
if len(evt)>=2 and evt[-1].isdigit() and len(evt[-1])>=10:
bcdList.append(evt[-1])
event_spath.append(os.path.join(evtpath, evtname))
'''==== 1. 生成标准特征集, 只需运行一次, 在 genfeats.py 中实现 ==========='''
bcdSet = set(bcdList)
gen_bcd_features(stdSamplePath, stdBarcodePath, stdFeaturePath, bcdSet)
print("stdFeats have generated and saved!")
'''==== 2. 生成事件字典, 只需运行一次 ==============='''
gen_eventdict(event_spath)
print("eventList have generated and saved!")
def test_one2one():
'''1:1性能评估'''
# 1. 只需运行一次,生成事件字典和相应的标准特征库字典
init_std_evt_dict()
# 2. 基于事件barcode集和标准库barcode交集构造事件集合
evtList, evtDict, stdDict = build_std_evt_dict()
rltdata = one2one_simi(evtList, evtDict, stdDict)
one2one_pr(rltdata)
def test_one2SN():
'''1:SN性能评估'''
# 1. 只需运行一次,生成事件字典和相应的标准特征库字典
init_std_evt_dict()
# 2. 事件barcode集和标准库barcode求交集
evtList, evtDict, stdDict = build_std_evt_dict()
one2SN_pr(evtList, evtDict, stdDict)
if __name__ == '__main__':
'''
共7个地址
(1) stdSamplePath: 用于生成比对标准特征集的原始图像地址
(2) stdBarcodePath: 比对标准特征集原始图像地址的pickle文件存储{barcode: [imgpath1, imgpath1, ...]}
(3) stdFeaturePath: 比对标准特征集特征存储地址
(4) eventSourcePath: 事件地址
(5) resultPath: 结果存储地址
(6) eventDataPath: 用于1:1比对的购物事件存储地址在resultPath下
(7) similPath: 1:1比对结果存储地址(事件级)在resultPath下
'''
# stdSamplePath = r"\\192.168.1.28\share\数据\已完成数据\展厅数据\v1.0\比对数据\整理\zhantingBase"
# stdBarcodePath = r"D:\exhibition\dataset\bcdpath"
# stdFeaturePath = r"\\192.168.1.28\share\数据\已完成数据\比对数据\barcode\all_totalBarocde\features_json\v11_barcode_11592"
# eventSourcePath = [r'D:\exhibition\images\20241202']
# eventSourcePath = [r"\\192.168.1.28\share\测试视频数据以及日志\各模块测试记录\展厅测试\1129_展厅模型v801测试组测试"]
stdSamplePath = r"\\192.168.1.28\share\数据\已完成数据\比对数据\barcode\all_totalBarocde\totalBarcode"
stdBarcodePath = r"D:\全实时\source_data\bcdpath"
stdFeaturePath = r"D:\全实时\source_data\stdfeats"
eventSourcePath = [r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\ShoppingDict_pkfile"]
resultPath = r"\\192.168.1.28\share\测试视频数据以及日志\算法全流程测试\202412\result\contrast"
eventDataPath = os.path.join(resultPath, "evtobjs")
similPath = os.path.join(resultPath, "simidata")
if not os.path.exists(eventDataPath):
os.makedirs(eventDataPath)
if not os.path.exists(similPath):
os.makedirs(similPath)
# test_one2one()
test_one2SN()

Some files were not shown because too many files have changed in this diff Show More