数据分析
This commit is contained in:
6
.idea/CopilotChatHistory.xml
generated
6
.idea/CopilotChatHistory.xml
generated
@ -3,6 +3,12 @@
|
||||
<component name="CopilotChatHistory">
|
||||
<option name="conversations">
|
||||
<list>
|
||||
<Conversation>
|
||||
<option name="createTime" value="1752195523240" />
|
||||
<option name="id" value="0197f6fde2a87e68b893b3a36dfc838f" />
|
||||
<option name="title" value="新对话 2025年7月11日 08:58:43" />
|
||||
<option name="updateTime" value="1752195523240" />
|
||||
</Conversation>
|
||||
<Conversation>
|
||||
<option name="createTime" value="1752114061266" />
|
||||
<option name="id" value="0197f222dfd27515a3dbfea638532ee5" />
|
||||
|
@ -15,25 +15,25 @@ base:
|
||||
|
||||
# 模型配置
|
||||
models:
|
||||
backbone: 'resnet34'
|
||||
channel_ratio: 1.0
|
||||
backbone: 'resnet18'
|
||||
channel_ratio: 0.75
|
||||
|
||||
# 训练参数
|
||||
training:
|
||||
epochs: 600 # 总训练轮次
|
||||
batch_size: 128 # 批次大小
|
||||
lr: 0.001 # 初始学习率
|
||||
lr: 0.007 # 初始学习率
|
||||
optimizer: "sgd" # 优化器类型
|
||||
metric: 'arcface' # 损失函数类型(可选:arcface/cosface/sphereface/softmax)
|
||||
loss: "cross_entropy" # 损失函数类型(可选:cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax)
|
||||
lr_step: 10 # 学习率调整间隔(epoch)
|
||||
lr_decay: 0.98 # 学习率衰减率
|
||||
lr_decay: 0.95 # 学习率衰减率
|
||||
weight_decay: 0.0005 # 权重衰减
|
||||
scheduler: "cosine" # 学习率调度器(可选:cosine/cosine_warm/step/None)
|
||||
num_workers: 32 # 数据加载线程数
|
||||
checkpoints: "./checkpoints/resnet34_20250612_scale=1.0/" # 模型保存目录
|
||||
restore: false
|
||||
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
|
||||
checkpoints: "./checkpoints/resnet18_20250717_scale=0.75_nosub/" # 模型保存目录
|
||||
restore: true
|
||||
restore_model: "./checkpoints/resnet18_20250716_scale=0.75_nosub/best.pth" # 模型恢复路径
|
||||
cosine_t_0: 10 # 初始周期长度
|
||||
cosine_t_mult: 1 # 周期长度倍率
|
||||
cosine_eta_min: 0.00001 # 最小学习率
|
||||
@ -49,8 +49,8 @@ data:
|
||||
train_batch_size: 128 # 训练批次大小
|
||||
val_batch_size: 128 # 验证批次大小
|
||||
num_workers: 32 # 数据加载线程数
|
||||
data_train_dir: "../data_center/contrast_learning/data_base/train" # 训练数据集根目录
|
||||
data_val_dir: "../data_center/contrast_learning/data_base/val" # 验证数据集根目录
|
||||
data_train_dir: "../data_center/contrast_data/v2/train" # 训练数据集根目录
|
||||
data_val_dir: "../data_center/contrast_data/v2/val" # 验证数据集根目录
|
||||
|
||||
transform:
|
||||
img_size: 224 # 图像尺寸
|
||||
@ -62,7 +62,7 @@ transform:
|
||||
|
||||
# 日志与监控
|
||||
logging:
|
||||
logging_dir: "./logs/resnet50_log" # 日志保存目录
|
||||
logging_dir: "./logs/resnet18_scale=0.75_nosub_log" # 日志保存目录
|
||||
tensorboard: true # 是否启用TensorBoard
|
||||
checkpoint_interval: 30 # 检查点保存间隔(epoch)
|
||||
|
||||
|
49
configs/similar_analysis.yml
Normal file
49
configs/similar_analysis.yml
Normal file
@ -0,0 +1,49 @@
|
||||
# configs/similar_analysis.yml
|
||||
# 专为模型训练对比设计的配置文件
|
||||
# 支持对比不同训练策略(如蒸馏vs独立训练)
|
||||
|
||||
# 基础配置
|
||||
base:
|
||||
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
|
||||
device: "cuda" # 训练设备(cuda/cpu)
|
||||
embedding_size: 256 # 特征维度
|
||||
pin_memory: true # 是否启用pin_memory
|
||||
distributed: true # 是否启用分布式训练
|
||||
|
||||
|
||||
# 模型配置
|
||||
models:
|
||||
backbone: 'resnet18'
|
||||
channel_ratio: 0.75
|
||||
model_path: "../checkpoints/resnet18_1009/best.pth"
|
||||
|
||||
heatmap:
|
||||
feature_layer: "layer4"
|
||||
show_heatmap: true
|
||||
# 数据配置
|
||||
data:
|
||||
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
|
||||
train_batch_size: 128 # 训练批次大小
|
||||
val_batch_size: 8 # 验证批次大小
|
||||
num_workers: 32 # 数据加载线程数
|
||||
data_dir: "/home/lc/data_center/image_analysis/error_compare_subimg"
|
||||
image_joint_pth: "/home/lc/data_center/image_analysis/error_compare_result"
|
||||
|
||||
transform:
|
||||
img_size: 224 # 图像尺寸
|
||||
img_mean: 0.5 # 图像均值
|
||||
img_std: 0.5 # 图像方差
|
||||
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
|
||||
RandomRotation: 180 # 随机旋转角度
|
||||
ColorJitter: 0.5 # 随机颜色抖动强度
|
||||
|
||||
# 日志与监控
|
||||
logging:
|
||||
logging_dir: "./logs/resnet18_scale=0.75_nosub_log" # 日志保存目录
|
||||
tensorboard: true # 是否启用TensorBoard
|
||||
checkpoint_interval: 30 # 检查点保存间隔(epoch)
|
||||
|
||||
# 分布式训练(可选)
|
||||
distributed:
|
||||
enabled: false # 是否启用分布式训练
|
||||
backend: "nccl" # 分布式后端(nccl/gloo)
|
@ -11,16 +11,22 @@ data:
|
||||
max_files_ratio: 0.1
|
||||
min_files: 10
|
||||
split_ratio: 0.9
|
||||
combine_scr_dir: "../../data_center/contrast_data/v1/val" # 合并数据集源目录·
|
||||
combine_dst_dir: "../../data_center/contrast_data/v2/val" # 合并数据集目标目录
|
||||
|
||||
|
||||
extend:
|
||||
extend_same_dir: true
|
||||
extend_extra: true
|
||||
extend_extra_dir: "../../data_center/contrast_data/v1/extra"
|
||||
extend_extra_dir: "../../data_center/contrast_data/v1/extra" # 扩展测试集数据
|
||||
extend_train: true
|
||||
extend_train_dir: "../../data_center/contrast_data/v1/train"
|
||||
extend_train_dir: "../../data_center/contrast_data/v1/train" # 训练接数据扩展
|
||||
|
||||
limit:
|
||||
count_limit: true
|
||||
limit_count: 200
|
||||
limit_dir: "../../data_center/contrast_data/v1/train"
|
||||
limit_dir: "../../data_center/contrast_data/v1/train" # 限制单个样本数量
|
||||
|
||||
control:
|
||||
combine: true # 是否进行子类数据集合并
|
||||
split: false # 子类数据集拆解与扩增
|
@ -8,7 +8,7 @@ base:
|
||||
log_level: "info" # 日志级别(debug/info/warning/error)
|
||||
embedding_size: 256 # 特征维度
|
||||
pin_memory: true # 是否启用pin_memory
|
||||
distributed: false # 是否启用分布式训练
|
||||
distributed: false # 是否启用分布式训练 启用热力图时不能用分布式训练
|
||||
|
||||
# 模型配置
|
||||
models:
|
||||
|
@ -22,7 +22,7 @@ data:
|
||||
test_batch_size: 128 # 验证批次大小
|
||||
num_workers: 32 # 数据加载线程数
|
||||
half: true # 是否启用半精度数据
|
||||
img_dirs_path: "/personalDocument/lic/base+stlib"
|
||||
img_dirs_path: "/home/lc/data_center/baseStlib/pic/stlib_base" # base标准库图片存储路径
|
||||
# img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
|
||||
xlsx_pth: false # 过滤商品, 默认None不进行过滤
|
||||
|
||||
@ -42,7 +42,7 @@ logging:
|
||||
|
||||
save:
|
||||
json_bin: "../search_library/yunhedian_05-09.json" # 保存整个json文件
|
||||
json_path: "../feature_json/base+stlib/" # 保存单个json文件路径
|
||||
json_path: "/home/lc/data_center/baseStlib/feature_json/stlib_base" # 保存单个json文件路径
|
||||
error_barcodes: "error_barcodes.txt"
|
||||
barcodes_statistics: "../search_library/barcodes_statistics.txt"
|
||||
create_single_json: true # 是否保存单个json文件
|
25
data_preprocessing/combine_sub_class.py
Normal file
25
data_preprocessing/combine_sub_class.py
Normal file
@ -0,0 +1,25 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
def combine_dirs(conf):
|
||||
source_root = conf['data']['combine_scr_dir']
|
||||
target_root = conf['data']['combine_dst_dir']
|
||||
for roots, dir_names, files in os.walk(source_root):
|
||||
for dir_name in dir_names:
|
||||
source_dir = os.path.join(roots, dir_name)
|
||||
target_dir = os.path.join(target_root, dir_name.split('_')[0])
|
||||
if not os.path.exists(target_dir):
|
||||
os.mkdir(target_dir)
|
||||
for filename in os.listdir(source_dir):
|
||||
print(filename)
|
||||
source_file = os.sep.join([source_dir, filename])
|
||||
target_file = os.sep.join([target_dir, filename])
|
||||
shutil.copy(source_file, target_file)
|
||||
# print(f"已复制目录 {source_dir} 到 {target_dir}")
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# source_root = r'scatter_mini'
|
||||
# target_root = r'C:\Users\123\Desktop\scatter-1'
|
||||
# # combine_dirs(conf)
|
@ -1,14 +1,18 @@
|
||||
from create_extra import split_subdirs
|
||||
from data_split import split_directory
|
||||
from extend import ImageExtendProcessor
|
||||
from combine_sub_class import combine_dirs
|
||||
import yaml
|
||||
|
||||
|
||||
def data_preprocessing(conf):
|
||||
# split_subdirs(conf)
|
||||
# image_ex = ImageExtendProcessor(conf)
|
||||
# image_ex.control_number()
|
||||
if conf['control']['split']:
|
||||
split_subdirs(conf)
|
||||
image_ex = ImageExtendProcessor(conf)
|
||||
image_ex.control_number()
|
||||
split_directory(conf)
|
||||
if conf['control']['combine']:
|
||||
combine_dirs(conf)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -17,7 +17,7 @@ from configs import trainer_tools
|
||||
import yaml
|
||||
from datetime import datetime
|
||||
|
||||
with open('configs/test.yml', 'r') as f:
|
||||
with open('../configs/test.yml', 'r') as f:
|
||||
conf = yaml.load(f, Loader=yaml.FullLoader)
|
||||
|
||||
# Constants from config
|
||||
|
@ -3,7 +3,7 @@ from tools.getHeatMap import cal_cam
|
||||
import os
|
||||
|
||||
|
||||
def merge_imgs(img1_path, img2_path, conf, similar=None, label=None, cam=None):
|
||||
def merge_imgs(img1_path, img2_path, conf, similar=None, label=None, cam=None, save_path=None):
|
||||
save = True
|
||||
position = (50, 50) # 文字的左上角坐标
|
||||
color = (255, 0, 0) # 红色文字,格式为 RGB
|
||||
@ -11,21 +11,24 @@ def merge_imgs(img1_path, img2_path, conf, similar=None, label=None, cam=None):
|
||||
# os.makedirs(os.sep.join([save_path, str(label)]))
|
||||
# save_path = os.sep.join([save_path, str(label)])
|
||||
# img_name = os.path.basename(img1_path).split('.')[0] + '_' + os.path.basename(img2_path).split('.')[0] + '.png'
|
||||
if save_path is None:
|
||||
save_path = conf['data']['image_joint_pth']
|
||||
if not conf['heatmap']['show_heatmap']:
|
||||
img1 = Image.open(img1_path)
|
||||
img2 = Image.open(img2_path)
|
||||
img1 = img1.resize((224, 224))
|
||||
img2 = img2.resize((224, 224))
|
||||
save_path = conf['data']['image_joint_pth']
|
||||
# save_path = conf['data']['image_joint_pth']
|
||||
else:
|
||||
assert cam is not None, 'cam is None'
|
||||
img1 = cam.get_hot_map(img1_path)
|
||||
img2 = cam.get_hot_map(img2_path)
|
||||
save_path = conf['heatmap']['image_joint_pth']
|
||||
# save_path = conf['heatmap']['image_joint_pth']
|
||||
# print('img1_path', img1)
|
||||
# print('img2_path', img2)
|
||||
if not os.path.exists(os.sep.join([save_path, str(label)])):
|
||||
os.makedirs(os.sep.join([save_path, str(label)]))
|
||||
if save_path is None:
|
||||
save_path = os.sep.join([save_path, str(label)])
|
||||
img_name = os.path.basename(img1_path).split('.')[0] + '_' + os.path.basename(img2_path).split('.')[0] + '.png'
|
||||
assert img1.height == img2.height
|
||||
|
106
tools/similar_analysis.py
Normal file
106
tools/similar_analysis.py
Normal file
@ -0,0 +1,106 @@
|
||||
from configs.utils import trainer_tools
|
||||
from test_ori import group_image, featurize, cosin_metric
|
||||
from tools.dataset import get_transform
|
||||
from tools.getHeatMap import cal_cam
|
||||
from tools.image_joint import merge_imgs
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
from collections import ChainMap
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
class analysis:
|
||||
def __init__(self):
|
||||
with open('../configs/similar_analysis.yml', 'r') as f:
|
||||
self.conf = yaml.load(f, Loader=yaml.FullLoader)
|
||||
self.model = self.initialize_model(self.conf)
|
||||
_, self.test_transform = get_transform(self.conf)
|
||||
self.cam = cal_cam(self.model, self.conf)
|
||||
|
||||
def initialize_model(self, conf):
|
||||
"""初始化模型和度量方法"""
|
||||
tr_tools = trainer_tools(conf)
|
||||
backbone_mapping = tr_tools.get_backbone()
|
||||
|
||||
if conf['models']['backbone'] in backbone_mapping:
|
||||
model = backbone_mapping[conf['models']['backbone']]()
|
||||
else:
|
||||
raise ValueError('不支持该模型: {}'.format({conf['models']['backbone']}))
|
||||
try:
|
||||
model.load_state_dict(torch.load(conf['models']['model_path'],
|
||||
map_location=conf['base']['device']))
|
||||
except:
|
||||
state_dict = torch.load(conf['models']['model_path'],
|
||||
map_location=conf['base']['device'])
|
||||
new_state_dict = {}
|
||||
for k, v in state_dict.items():
|
||||
new_key = k.replace("module.", "")
|
||||
new_state_dict[new_key] = v
|
||||
model.load_state_dict(new_state_dict, strict=False)
|
||||
return model.eval()
|
||||
|
||||
def get_feature(self, img_pth):
|
||||
group = group_image([img_pth], self.conf['data']['val_batch_size'])
|
||||
feature = featurize(group[0], self.test_transform, self.model, self.conf['base']['device'])
|
||||
return feature
|
||||
|
||||
def get_similarity(self, feature_dict1, feature_dict2):
|
||||
similarity = cosin_metric(feature_dict1, feature_dict2)
|
||||
print(f"Similarity: {similarity}")
|
||||
return similarity
|
||||
|
||||
def get_feature_map(self, all_imgs):
|
||||
feature_dicts = {}
|
||||
for img_pth in all_imgs:
|
||||
feature_dict = self.get_feature(img_pth)
|
||||
feature_dicts = dict(ChainMap(feature_dict, feature_dicts))
|
||||
return feature_dicts
|
||||
|
||||
def get_image_map(self):
|
||||
all_compare_img = []
|
||||
for root, dirs, files in os.walk(self.conf['data']['data_dir']):
|
||||
if len(dirs) == 2:
|
||||
dir_pth_1 = os.sep.join([root, dirs[0]])
|
||||
dir_pth_2 = os.sep.join([root, dirs[1]])
|
||||
for img_name_1 in os.listdir(dir_pth_1):
|
||||
for img_name_2 in os.listdir(dir_pth_2):
|
||||
all_compare_img.append((os.sep.join([dir_pth_1, img_name_1]),
|
||||
os.sep.join([dir_pth_2, img_name_2])))
|
||||
return all_compare_img
|
||||
|
||||
def create_total_feature(self):
|
||||
all_imgs = []
|
||||
for root, dirs, files in os.walk(self.conf['data']['data_dir']):
|
||||
if len(dirs) == 2:
|
||||
for dir_name in dirs:
|
||||
dir_pth = os.sep.join([root, dir_name])
|
||||
for img_name in os.listdir(dir_pth):
|
||||
all_imgs.append(os.sep.join([dir_pth, img_name]))
|
||||
return all_imgs
|
||||
|
||||
def get_contrast_result(self, feature_dicts, all_compare_img):
|
||||
for img_pth1, img_pth2 in all_compare_img:
|
||||
feature_dict1 = feature_dicts[img_pth1]
|
||||
feature_dict2 = feature_dicts[img_pth2]
|
||||
similarity = self.get_similarity(feature_dict1.cpu().numpy(),
|
||||
feature_dict2.cpu().numpy())
|
||||
dir_name = img_pth1.split(os.sep)[-3]
|
||||
save_path = os.sep.join([self.conf['data']['image_joint_pth'], dir_name])
|
||||
if similarity > 0.7:
|
||||
merge_imgs(img_pth1,
|
||||
img_pth2,
|
||||
self.conf,
|
||||
similarity,
|
||||
label=None,
|
||||
cam=self.cam,
|
||||
save_path=save_path)
|
||||
print(similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ana = analysis()
|
||||
all_imgs = ana.create_total_feature()
|
||||
feature_dicts = ana.get_feature_map(all_imgs)
|
||||
all_compare_img = ana.get_image_map()
|
||||
ana.get_contrast_result(feature_dicts, all_compare_img)
|
@ -17,7 +17,7 @@ import yaml
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def load_configuration(config_path='configs/scatter.yml'):
|
||||
def load_configuration(config_path='configs/compare.yml'):
|
||||
"""加载配置文件"""
|
||||
with open(config_path, 'r') as f:
|
||||
return yaml.load(f, Loader=yaml.FullLoader)
|
||||
@ -74,7 +74,7 @@ def train_one_epoch(model, metric, criterion, optimizer, dataloader, device, sca
|
||||
data = data.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
with torch.cuda.amp.autocast():
|
||||
# with torch.cuda.amp.autocast():
|
||||
embeddings = model(data)
|
||||
if not conf['training']['metric'] == 'softmax':
|
||||
thetas = metric(embeddings, labels)
|
||||
|
Reference in New Issue
Block a user