增加学习率调度方式

This commit is contained in:
lee
2025-06-13 10:45:53 +08:00
parent 37ecef40f7
commit 1803f319a5
13 changed files with 319 additions and 294 deletions

View File

@ -15,8 +15,8 @@ base:
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 0.75
backbone: 'resnet34'
channel_ratio: 1.0
# 训练参数
training:
@ -29,11 +29,14 @@ training:
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.98 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine_annealing" # 学习率调度器可选cosine_annealing/step/none
scheduler: "cosine" # 学习率调度器可选cosine/cosine_warm/step/None
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
checkpoints: "./checkpoints/resnet34_20250612_scale=1.0/" # 模型保存目录
restore: false
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
cosine_t_0: 10 # 初始周期长度
cosine_t_mult: 1 # 周期长度倍率
cosine_eta_min: 0.00001 # 最小学习率
# 验证参数
validation:

View File

@ -8,13 +8,13 @@ base:
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
distributed: false # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 1.0
model_path: "./checkpoints/resnet18_scatter_6.2/best.pth"
channel_ratio: 0.75
model_path: "./checkpoints/resnet18_0515/best.pth"
half: false # 是否启用半精度测试fp16
# 数据配置
@ -22,9 +22,9 @@ data:
group_test: False # 数据集名称(示例用,可替换为实际数据集)
test_batch_size: 128 # 训练批次大小
num_workers: 32 # 数据加载线程数
test_dir: "../data_center/scatter/" # 验证数据集根目录
test_dir: "../data_center/contrast_learning/contrast_test_data" # 验证数据集根目录
test_group_json: "../data_center/contrast_learning/model_test_data/test/inner_group_pairs.json"
test_list: "../data_center/scatter/val_pair.txt"
test_list: "../data_center/contrast_learning/contrast_test_data/test_pair.txt"
transform:
img_size: 224 # 图像尺寸

27
configs/transform.yml Normal file
View File

@ -0,0 +1,27 @@
# configs/transform.yml
# pth转换onnx配置文件
# 基础配置
base:
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
seed: 42 # 随机种子(保证可复现性)
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet50'
channel_ratio: 1.0
model_path: "../checkpoints/resnet50_0519/best.pth"
onnx_model: "../checkpoints/resnet50_0519/best.onnx"
rknn_model: "../checkpoints/resnet50_0519/best.rknn"
# 日志与监控
logging:
logging_dir: "./logs" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch

View File

@ -1,4 +1,4 @@
from model import (resnet18, mobilevit_s, MobileNetV3_Small, MobileNetV3_Large, mobilenet_v1,
from model import (resnet18, resnet34, resnet50, mobilevit_s, MobileNetV3_Small, MobileNetV3_Large, mobilenet_v1,
PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5)
from timm.models import vit_base_patch16_224 as vit_base_16
from model.metric import ArcFace, CosFace
@ -14,6 +14,8 @@ class trainer_tools:
def get_backbone(self):
backbone_mapping = {
'resnet18': lambda: resnet18(scale=self.conf['models']['channel_ratio']),
'resnet34': lambda: resnet34(scale=self.conf['models']['channel_ratio']),
'resnet50': lambda: resnet50(scale=self.conf['models']['channel_ratio']),
'mobilevit_s': lambda: mobilevit_s(),
'mobilenetv3_small': lambda: MobileNetV3_Small(),
'PPLCNET_x1_0': lambda: PPLCNET_x1_0(),
@ -54,3 +56,24 @@ class trainer_tools:
)
}
return optimizer_mapping
def get_scheduler(self, optimizer):
scheduler_mapping = {
'step': lambda: optim.lr_scheduler.StepLR(
optimizer,
step_size=self.conf['training']['lr_step'],
gamma=self.conf['training']['lr_decay']
),
'cosine': lambda: optim.lr_scheduler.CosineAnnealingLR(
optimizer,
T_max=self.conf['training']['epochs'],
eta_min=self.conf['training']['cosine_eta_min']
),
'cosine_warm': lambda: optim.lr_scheduler.CosineAnnealingWarmRestarts(
optimizer,
T_0=self.conf['training'].get('cosine_t_0', 10),
T_mult=self.conf['training'].get('cosine_t_mult', 1),
eta_min=self.conf['training'].get('cosine_eta_min', 0)
)
}
return scheduler_mapping