This commit is contained in:
lee
2025-06-11 15:23:50 +08:00
commit 37ecef40f7
79 changed files with 26981 additions and 0 deletions

1
configs/__init__.py Normal file
View File

@ -0,0 +1 @@
from .utils import trainer_tools

69
configs/compare.yml Normal file
View File

@ -0,0 +1,69 @@
# configs/compare.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
seed: 42 # 随机种子(保证可复现性)
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 0.75
# 训练参数
training:
epochs: 600 # 总训练轮次
batch_size: 128 # 批次大小
lr: 0.001 # 初始学习率
optimizer: "sgd" # 优化器类型
metric: 'arcface' # 损失函数类型可选arcface/cosface/sphereface/softmax
loss: "cross_entropy" # 损失函数类型可选cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.98 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine_annealing" # 学习率调度器可选cosine_annealing/step/none
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
restore: false
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
# 验证参数
validation:
num_workers: 32 # 数据加载线程数
val_batch_size: 128 # 测试批次大小
# 数据配置
data:
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
train_batch_size: 128 # 训练批次大小
val_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_train_dir: "../data_center/contrast_learning/data_base/train" # 训练数据集根目录
data_val_dir: "../data_center/contrast_learning/data_base/val" # 验证数据集根目录
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./logs" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo

75
configs/distill.yml Normal file
View File

@ -0,0 +1,75 @@
# configs/compare.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
seed: 42 # 随机种子(保证可复现性)
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 1.0 # 主干特征通道缩放比例(默认)
student_channel_ratio: 0.75
teacher_model_path: "./checkpoints/resnet50_0519/best.pth"
# 训练参数
training:
epochs: 600 # 总训练轮次
batch_size: 128 # 批次大小
lr: 0.001 # 初始学习率
optimizer: "sgd" # 优化器类型
metric: 'arcface' # 损失函数类型可选arcface/cosface/sphereface/softmax
loss: "cross_entropy" # 损失函数类型可选cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.98 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine_annealing" # 学习率调度器可选cosine_annealing/step/none
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
restore: false
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
distill_weight: 0.8 # 蒸馏损失权重
temperature: 4 # 蒸馏温度
# 验证参数
validation:
num_workers: 32 # 数据加载线程数
val_batch_size: 128 # 测试批次大小
# 数据配置
data:
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
train_batch_size: 128 # 训练批次大小
val_batch_size: 100 # 验证批次大小
num_workers: 4 # 数据加载线程数
data_train_dir: "../data_center/contrast_learning/data_base/train" # 训练数据集根目录
data_val_dir: "../data_center/contrast_learning/data_base/val" # 验证数据集根目录
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./logs" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo

69
configs/scatter.yml Normal file
View File

@ -0,0 +1,69 @@
# configs/scatter.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 1.0
# 训练参数
training:
epochs: 300 # 总训练轮次
batch_size: 64 # 批次大小
lr: 0.005 # 初始学习率
optimizer: "sgd" # 优化器类型
metric: 'arcface' # 损失函数类型可选arcface/cosface/sphereface/softmax
loss: "cross_entropy" # 损失函数类型可选cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.98 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine_annealing" # 学习率调度器可选cosine_annealing/step/none
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet18_scatter_6.2/" # 模型保存目录
restore: True
restore_model: "checkpoints/resnet18_scatter_6.2/best.pth" # 模型恢复路径
# 验证参数
validation:
num_workers: 32 # 数据加载线程数
val_batch_size: 128 # 测试批次大小
# 数据配置
data:
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
train_batch_size: 128 # 训练批次大小
val_batch_size: 100 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_train_dir: "../data_center/scatter/train" # 训练数据集根目录
data_val_dir: "../data_center/scatter/val" # 验证数据集根目录
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./log/2025.6.2-scatter.txt" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo

41
configs/test.yml Normal file
View File

@ -0,0 +1,41 @@
# configs/test.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 1.0
model_path: "./checkpoints/resnet18_scatter_6.2/best.pth"
half: false # 是否启用半精度测试fp16
# 数据配置
data:
group_test: False # 数据集名称(示例用,可替换为实际数据集)
test_batch_size: 128 # 训练批次大小
num_workers: 32 # 数据加载线程数
test_dir: "../data_center/scatter/" # 验证数据集根目录
test_group_json: "../data_center/contrast_learning/model_test_data/test/inner_group_pairs.json"
test_list: "../data_center/scatter/val_pair.txt"
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
save:
save_dir: ""
save_name: ""

56
configs/utils.py Normal file
View File

@ -0,0 +1,56 @@
from model import (resnet18, mobilevit_s, MobileNetV3_Small, MobileNetV3_Large, mobilenet_v1,
PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5)
from timm.models import vit_base_patch16_224 as vit_base_16
from model.metric import ArcFace, CosFace
import torch.optim as optim
import torch.nn as nn
import timm
class trainer_tools:
def __init__(self, conf):
self.conf = conf
def get_backbone(self):
backbone_mapping = {
'resnet18': lambda: resnet18(scale=self.conf['models']['channel_ratio']),
'mobilevit_s': lambda: mobilevit_s(),
'mobilenetv3_small': lambda: MobileNetV3_Small(),
'PPLCNET_x1_0': lambda: PPLCNET_x1_0(),
'PPLCNET_x0_5': lambda: PPLCNET_x0_5(),
'PPLCNET_x2_5': lambda: PPLCNET_x2_5(),
'mobilenetv3_large': lambda: MobileNetV3_Large(),
'vit_base': lambda: vit_base_16(pretrained=True),
'efficientnet': lambda: timm.create_model('efficientnet_b0', pretrained=True,
num_classes=self.conf.embedding_size)
}
return backbone_mapping
def get_metric(self, class_num):
# 优化后的metric选择代码块使用字典映射提高可读性和扩展性
metric_mapping = {
'arcface': lambda: ArcFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
'cosface': lambda: CosFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
'softmax': lambda: nn.Linear(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device'])
}
return metric_mapping
def get_optimizer(self, model, metric):
optimizer_mapping = {
'sgd': lambda: optim.SGD(
[{'params': model.parameters()}, {'params': metric.parameters()}],
lr=self.conf['training']['lr'],
weight_decay=self.conf['training']['weight_decay']
),
'adam': lambda: optim.Adam(
[{'params': model.parameters()}, {'params': metric.parameters()}],
lr=self.conf['training']['lr'],
weight_decay=self.conf['training']['weight_decay']
),
'adamw': lambda: optim.AdamW(
[{'params': model.parameters()}, {'params': metric.parameters()}],
lr=self.conf['training']['lr'],
weight_decay=self.conf['training']['weight_decay']
)
}
return optimizer_mapping

47
configs/write_feature.yml Normal file
View File

@ -0,0 +1,47 @@
# configs/write_feature.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
distributed: true # 是否启用分布式训练
pin_memory: true # 是否启用pin_memory
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 0.75
checkpoints: "../checkpoints/resnet18_1009/best.pth"
# 数据配置
data:
train_batch_size: 128 # 训练批次大小
test_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
half: true # 是否启用半精度数据
img_dirs_path: "/shareData/temp_data/comparison/Hangzhou_Yunhe/base_data/05-09"
# img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
xlsx_pth: false # 过滤商品, 默认None不进行过滤
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./logs" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
save:
json_bin: "../search_library/yunhedian_05-09.json" # 保存整个json文件
json_path: "../data/feature_json_compare/" # 保存单个json文件
error_barcodes: "error_barcodes.txt"
barcodes_statistics: "../search_library/barcodes_statistics.txt"