Files
ieemoo-ai-contrast/configs/compare.yml
2025-07-17 14:33:18 +08:00

73 lines
3.3 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# configs/compare.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
seed: 42 # 随机种子(保证可复现性)
device: "cuda" # 训练设备cuda/cpu
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 0.75
# 训练参数
training:
epochs: 600 # 总训练轮次
batch_size: 128 # 批次大小
lr: 0.007 # 初始学习率
optimizer: "sgd" # 优化器类型
metric: 'arcface' # 损失函数类型可选arcface/cosface/sphereface/softmax
loss: "cross_entropy" # 损失函数类型可选cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.95 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine" # 学习率调度器可选cosine/cosine_warm/step/None
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet18_20250717_scale=0.75_nosub/" # 模型保存目录
restore: true
restore_model: "./checkpoints/resnet18_20250716_scale=0.75_nosub/best.pth" # 模型恢复路径
cosine_t_0: 10 # 初始周期长度
cosine_t_mult: 1 # 周期长度倍率
cosine_eta_min: 0.00001 # 最小学习率
# 验证参数
validation:
num_workers: 32 # 数据加载线程数
val_batch_size: 128 # 测试批次大小
# 数据配置
data:
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
train_batch_size: 128 # 训练批次大小
val_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_train_dir: "../data_center/contrast_data/v2/train" # 训练数据集根目录
data_val_dir: "../data_center/contrast_data/v2/val" # 验证数据集根目录
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./logs/resnet18_scale=0.75_nosub_log" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo