70 lines
3.1 KiB
YAML
70 lines
3.1 KiB
YAML
# configs/compare.yml
|
||
# 专为模型训练对比设计的配置文件
|
||
# 支持对比不同训练策略(如蒸馏vs独立训练)
|
||
|
||
# 基础配置
|
||
base:
|
||
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
|
||
seed: 42 # 随机种子(保证可复现性)
|
||
device: "cuda" # 训练设备(cuda/cpu)
|
||
log_level: "info" # 日志级别(debug/info/warning/error)
|
||
embedding_size: 256 # 特征维度
|
||
pin_memory: true # 是否启用pin_memory
|
||
distributed: true # 是否启用分布式训练
|
||
|
||
|
||
# 模型配置
|
||
models:
|
||
backbone: 'resnet18'
|
||
channel_ratio: 0.75
|
||
|
||
# 训练参数
|
||
training:
|
||
epochs: 600 # 总训练轮次
|
||
batch_size: 128 # 批次大小
|
||
lr: 0.001 # 初始学习率
|
||
optimizer: "sgd" # 优化器类型
|
||
metric: 'arcface' # 损失函数类型(可选:arcface/cosface/sphereface/softmax)
|
||
loss: "cross_entropy" # 损失函数类型(可选:cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax)
|
||
lr_step: 10 # 学习率调整间隔(epoch)
|
||
lr_decay: 0.98 # 学习率衰减率
|
||
weight_decay: 0.0005 # 权重衰减
|
||
scheduler: "cosine_annealing" # 学习率调度器(可选:cosine_annealing/step/none)
|
||
num_workers: 32 # 数据加载线程数
|
||
checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
|
||
restore: false
|
||
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
|
||
|
||
# 验证参数
|
||
validation:
|
||
num_workers: 32 # 数据加载线程数
|
||
val_batch_size: 128 # 测试批次大小
|
||
|
||
# 数据配置
|
||
data:
|
||
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
|
||
train_batch_size: 128 # 训练批次大小
|
||
val_batch_size: 128 # 验证批次大小
|
||
num_workers: 32 # 数据加载线程数
|
||
data_train_dir: "../data_center/contrast_learning/data_base/train" # 训练数据集根目录
|
||
data_val_dir: "../data_center/contrast_learning/data_base/val" # 验证数据集根目录
|
||
|
||
transform:
|
||
img_size: 224 # 图像尺寸
|
||
img_mean: 0.5 # 图像均值
|
||
img_std: 0.5 # 图像方差
|
||
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
|
||
RandomRotation: 180 # 随机旋转角度
|
||
ColorJitter: 0.5 # 随机颜色抖动强度
|
||
|
||
# 日志与监控
|
||
logging:
|
||
logging_dir: "./logs" # 日志保存目录
|
||
tensorboard: true # 是否启用TensorBoard
|
||
checkpoint_interval: 30 # 检查点保存间隔(epoch)
|
||
|
||
# 分布式训练(可选)
|
||
distributed:
|
||
enabled: false # 是否启用分布式训练
|
||
backend: "nccl" # 分布式后端(nccl/gloo)
|