rebuild

2025-06-11 15:23:50 +08:00
commit 37ecef40f7
79 changed files with 26981 additions and 0 deletions
--- a/configs/init.py
+++ b/configs/init.py
@ -0,0 +1 @@
+from .utils import trainer_tools
--- a/configs/compare.yml
+++ b/configs/compare.yml
@ -0,0 +1,69 @@
+# configs/compare.yml
+# 专为模型训练对比设计的配置文件
+# 支持对比不同训练策略（如蒸馏vs独立训练）
+
+# 基础配置
+base:
+  experiment_name: "model_comparison"  # 实验名称（用于结果保存目录）
+  seed: 42                            # 随机种子（保证可复现性）
+  device: "cuda"                      # 训练设备（cuda/cpu）
+  log_level: "info"                   # 日志级别（debug/info/warning/error）
+  embedding_size: 256                 # 特征维度
+  pin_memory: true                    # 是否启用pin_memory
+  distributed: true                  # 是否启用分布式训练
+
+
+# 模型配置
+models:
+  backbone: 'resnet18'
+  channel_ratio: 0.75
+
+# 训练参数
+training:
+  epochs: 600                         # 总训练轮次
+  batch_size: 128                     # 批次大小
+  lr: 0.001                           # 初始学习率
+  optimizer: "sgd"                    # 优化器类型
+  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
+  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
+  lr_step: 10                         # 学习率调整间隔（epoch）
+  lr_decay: 0.98                      # 学习率衰减率
+  weight_decay: 0.0005                  # 权重衰减
+  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
+  num_workers: 32                     # 数据加载线程数
+  checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
+  restore: false
+  restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
+
+# 验证参数
+validation:
+  num_workers: 32                     # 数据加载线程数
+  val_batch_size: 128                 #  测试批次大小
+
+# 数据配置
+data:
+  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
+  train_batch_size: 128               # 训练批次大小
+  val_batch_size: 128                 # 验证批次大小
+  num_workers: 32                      # 数据加载线程数
+  data_train_dir: "../data_center/contrast_learning/data_base/train"   # 训练数据集根目录
+  data_val_dir: "../data_center/contrast_learning/data_base/val"   # 验证数据集根目录
+
+transform:
+  img_size: 224                     # 图像尺寸
+  img_mean: 0.5                     # 图像均值
+  img_std: 0.5                      # 图像方差
+  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
+  RandomRotation: 180               # 随机旋转角度
+  ColorJitter: 0.5                   # 随机颜色抖动强度
+
+# 日志与监控
+logging:
+  logging_dir: "./logs"                  # 日志保存目录
+  tensorboard: true                   # 是否启用TensorBoard
+  checkpoint_interval: 30             # 检查点保存间隔（epoch）
+
+# 分布式训练（可选）
+distributed:
+  enabled: false                      # 是否启用分布式训练
+  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/distill.yml
+++ b/configs/distill.yml
@ -0,0 +1,75 @@
+# configs/compare.yml
+# 专为模型训练对比设计的配置文件
+# 支持对比不同训练策略（如蒸馏vs独立训练）
+
+# 基础配置
+base:
+  experiment_name: "model_comparison"  # 实验名称（用于结果保存目录）
+  seed: 42                            # 随机种子（保证可复现性）
+  device: "cuda"                      # 训练设备（cuda/cpu）
+  log_level: "info"                   # 日志级别（debug/info/warning/error）
+  embedding_size: 256                 # 特征维度
+  pin_memory: true                    # 是否启用pin_memory
+  distributed: true                  # 是否启用分布式训练
+
+
+# 模型配置
+models:
+  backbone: 'resnet18'
+  channel_ratio: 1.0                  # 主干特征通道缩放比例(默认)
+  student_channel_ratio: 0.75
+  teacher_model_path: "./checkpoints/resnet50_0519/best.pth"
+
+# 训练参数
+training:
+  epochs: 600                         # 总训练轮次
+  batch_size: 128                     # 批次大小
+  lr: 0.001                           # 初始学习率
+  optimizer: "sgd"                    # 优化器类型
+  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
+  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
+  lr_step: 10                         # 学习率调整间隔（epoch）
+  lr_decay: 0.98                      # 学习率衰减率
+  weight_decay: 0.0005                  # 权重衰减
+  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
+  num_workers: 32                     # 数据加载线程数
+  checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
+  restore: false
+  restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
+  distill_weight: 0.8                  # 蒸馏损失权重
+  temperature: 4                    # 蒸馏温度
+
+
+
+# 验证参数
+validation:
+  num_workers: 32                     # 数据加载线程数
+  val_batch_size: 128                 #  测试批次大小
+
+# 数据配置
+data:
+  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
+  train_batch_size: 128               # 训练批次大小
+  val_batch_size: 100                 # 验证批次大小
+  num_workers: 4                      # 数据加载线程数
+  data_train_dir: "../data_center/contrast_learning/data_base/train"   # 训练数据集根目录
+  data_val_dir: "../data_center/contrast_learning/data_base/val"   # 验证数据集根目录
+
+transform:
+  img_size: 224                     # 图像尺寸
+  img_mean: 0.5                     # 图像均值
+  img_std: 0.5                      # 图像方差
+  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
+  RandomRotation: 180               # 随机旋转角度
+  ColorJitter: 0.5                   # 随机颜色抖动强度
+
+# 日志与监控
+logging:
+  logging_dir: "./logs"                  # 日志保存目录
+  tensorboard: true                   # 是否启用TensorBoard
+  checkpoint_interval: 30             # 检查点保存间隔（epoch）
+
+# 分布式训练（可选）
+distributed:
+  enabled: false                      # 是否启用分布式训练
+  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/scatter.yml
+++ b/configs/scatter.yml
@ -0,0 +1,69 @@
+# configs/scatter.yml
+# 专为模型训练对比设计的配置文件
+# 支持对比不同训练策略（如蒸馏vs独立训练）
+
+# 基础配置
+base:
+  device: "cuda"                      # 训练设备（cuda/cpu）
+  log_level: "info"                   # 日志级别（debug/info/warning/error）
+  embedding_size: 256                 # 特征维度
+  pin_memory: true                    # 是否启用pin_memory
+  distributed: true                  # 是否启用分布式训练
+
+
+# 模型配置
+models:
+  backbone: 'resnet18'
+  channel_ratio: 1.0
+
+# 训练参数
+training:
+  epochs: 300                         # 总训练轮次
+  batch_size: 64                     # 批次大小
+  lr: 0.005                           # 初始学习率
+  optimizer: "sgd"                    # 优化器类型
+  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
+  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
+  lr_step: 10                         # 学习率调整间隔（epoch）
+  lr_decay: 0.98                      # 学习率衰减率
+  weight_decay: 0.0005                  # 权重衰减
+  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
+  num_workers: 32                     # 数据加载线程数
+  checkpoints: "./checkpoints/resnet18_scatter_6.2/" # 模型保存目录
+  restore: True
+  restore_model: "checkpoints/resnet18_scatter_6.2/best.pth" # 模型恢复路径
+
+
+
+# 验证参数
+validation:
+  num_workers: 32                     # 数据加载线程数
+  val_batch_size: 128                 #  测试批次大小
+
+# 数据配置
+data:
+  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
+  train_batch_size: 128               # 训练批次大小
+  val_batch_size: 100                 # 验证批次大小
+  num_workers: 32                      # 数据加载线程数
+  data_train_dir: "../data_center/scatter/train"   # 训练数据集根目录
+  data_val_dir: "../data_center/scatter/val"   # 验证数据集根目录
+
+transform:
+  img_size: 224                     # 图像尺寸
+  img_mean: 0.5                     # 图像均值
+  img_std: 0.5                      # 图像方差
+  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
+  RandomRotation: 180               # 随机旋转角度
+  ColorJitter: 0.5                   # 随机颜色抖动强度
+
+# 日志与监控
+logging:
+  logging_dir: "./log/2025.6.2-scatter.txt"                  # 日志保存目录
+  tensorboard: true                   # 是否启用TensorBoard
+  checkpoint_interval: 30             # 检查点保存间隔（epoch）
+
+# 分布式训练（可选）
+distributed:
+  enabled: false                      # 是否启用分布式训练
+  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/test.yml
+++ b/configs/test.yml
@ -0,0 +1,41 @@
+# configs/test.yml
+# 专为模型训练对比设计的配置文件
+# 支持对比不同训练策略（如蒸馏vs独立训练）
+
+# 基础配置
+base:
+  device: "cuda"                      # 训练设备（cuda/cpu）
+  log_level: "info"                   # 日志级别（debug/info/warning/error）
+  embedding_size: 256                 # 特征维度
+  pin_memory: true                    # 是否启用pin_memory
+  distributed: true                  # 是否启用分布式训练
+
+# 模型配置
+models:
+  backbone: 'resnet18'
+  channel_ratio: 1.0
+  model_path: "./checkpoints/resnet18_scatter_6.2/best.pth"
+  half: false                        # 是否启用半精度测试（fp16）
+
+# 数据配置
+data:
+  group_test: False                 # 数据集名称（示例用，可替换为实际数据集）
+  test_batch_size: 128               # 训练批次大小
+  num_workers: 32                      # 数据加载线程数
+  test_dir: "../data_center/scatter/"   # 验证数据集根目录
+  test_group_json: "../data_center/contrast_learning/model_test_data/test/inner_group_pairs.json"
+  test_list: "../data_center/scatter/val_pair.txt"
+
+transform:
+  img_size: 224                     # 图像尺寸
+  img_mean: 0.5                     # 图像均值
+  img_std: 0.5                      # 图像方差
+  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
+  RandomRotation: 180               # 随机旋转角度
+  ColorJitter: 0.5                   # 随机颜色抖动强度
+
+save:
+  save_dir: ""
+  save_name: ""
+
+
--- a/configs/utils.py
+++ b/configs/utils.py
@ -0,0 +1,56 @@
+from model import (resnet18, mobilevit_s, MobileNetV3_Small, MobileNetV3_Large, mobilenet_v1,
+                   PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5)
+from timm.models import vit_base_patch16_224 as vit_base_16
+from model.metric import ArcFace, CosFace
+import torch.optim as optim
+import torch.nn as nn
+import timm
+
+
+class trainer_tools:
+    def __init__(self, conf):
+        self.conf = conf
+
+    def get_backbone(self):
+        backbone_mapping = {
+            'resnet18': lambda: resnet18(scale=self.conf['models']['channel_ratio']),
+            'mobilevit_s': lambda: mobilevit_s(),
+            'mobilenetv3_small': lambda: MobileNetV3_Small(),
+            'PPLCNET_x1_0': lambda: PPLCNET_x1_0(),
+            'PPLCNET_x0_5': lambda: PPLCNET_x0_5(),
+            'PPLCNET_x2_5': lambda: PPLCNET_x2_5(),
+            'mobilenetv3_large': lambda: MobileNetV3_Large(),
+            'vit_base': lambda: vit_base_16(pretrained=True),
+            'efficientnet': lambda: timm.create_model('efficientnet_b0', pretrained=True,
+                                                      num_classes=self.conf.embedding_size)
+        }
+        return backbone_mapping
+
+    def get_metric(self, class_num):
+        # 优化后的metric选择代码块，使用字典映射提高可读性和扩展性
+        metric_mapping = {
+            'arcface': lambda: ArcFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
+            'cosface': lambda: CosFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
+            'softmax': lambda: nn.Linear(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device'])
+        }
+        return metric_mapping
+
+    def get_optimizer(self, model, metric):
+        optimizer_mapping = {
+            'sgd': lambda: optim.SGD(
+                [{'params': model.parameters()}, {'params': metric.parameters()}],
+                lr=self.conf['training']['lr'],
+                weight_decay=self.conf['training']['weight_decay']
+            ),
+            'adam': lambda: optim.Adam(
+                [{'params': model.parameters()}, {'params': metric.parameters()}],
+                lr=self.conf['training']['lr'],
+                weight_decay=self.conf['training']['weight_decay']
+            ),
+            'adamw': lambda: optim.AdamW(
+                [{'params': model.parameters()}, {'params': metric.parameters()}],
+                lr=self.conf['training']['lr'],
+                weight_decay=self.conf['training']['weight_decay']
+            )
+        }
+        return optimizer_mapping
--- a/configs/write_feature.yml
+++ b/configs/write_feature.yml
@ -0,0 +1,47 @@
+# configs/write_feature.yml
+# 专为模型训练对比设计的配置文件
+# 支持对比不同训练策略（如蒸馏vs独立训练）
+
+# 基础配置
+base:
+  device: "cuda"                      # 训练设备（cuda/cpu）
+  log_level: "info"                   # 日志级别（debug/info/warning/error）
+  embedding_size: 256                 # 特征维度
+  distributed: true                  # 是否启用分布式训练
+  pin_memory: true                    # 是否启用pin_memory
+
+# 模型配置
+models:
+  backbone: 'resnet18'
+  channel_ratio: 0.75
+  checkpoints: "../checkpoints/resnet18_1009/best.pth"
+
+# 数据配置
+data:
+  train_batch_size: 128               # 训练批次大小
+  test_batch_size: 128                 # 验证批次大小
+  num_workers: 32                      # 数据加载线程数
+  half:  true                         # 是否启用半精度数据
+  img_dirs_path: "/shareData/temp_data/comparison/Hangzhou_Yunhe/base_data/05-09"
+#  img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
+  xlsx_pth: false  # 过滤商品， 默认None不进行过滤
+
+transform:
+  img_size: 224                     # 图像尺寸
+  img_mean: 0.5                     # 图像均值
+  img_std: 0.5                      # 图像方差
+  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
+  RandomRotation: 180               # 随机旋转角度
+  ColorJitter: 0.5                   # 随机颜色抖动强度
+
+# 日志与监控
+logging:
+  logging_dir: "./logs"                  # 日志保存目录
+  tensorboard: true                   # 是否启用TensorBoard
+  checkpoint_interval: 30             # 检查点保存间隔（epoch）
+
+save:
+  json_bin: "../search_library/yunhedian_05-09.json"       # 保存整个json文件
+  json_path: "../data/feature_json_compare/"    # 保存单个json文件
+  error_barcodes: "error_barcodes.txt"
+  barcodes_statistics: "../search_library/barcodes_statistics.txt"