# configs/scatter.yml # 专为模型训练对比设计的配置文件 # 支持对比不同训练策略(如蒸馏vs独立训练) # 基础配置 base: device: "cuda" # 训练设备(cuda/cpu) log_level: "info" # 日志级别(debug/info/warning/error) embedding_size: 256 # 特征维度 pin_memory: true # 是否启用pin_memory distributed: true # 是否启用分布式训练 # 模型配置 models: backbone: 'resnet18' channel_ratio: 1.0 # 训练参数 training: epochs: 800 # 总训练轮次 batch_size: 64 # 批次大小 lr: 0.01 # 初始学习率 optimizer: "sgd" # 优化器类型 metric: 'arcface' # 损失函数类型(可选:arcface/cosface/sphereface/softmax) loss: "cross_entropy" # 损失函数类型(可选:cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax) lr_step: 10 # 学习率调整间隔(epoch) lr_decay: 0.95 # 学习率衰减率 weight_decay: 0.0005 # 权重衰减 scheduler: "step" # 学习率调度器(可选:cosine_annealing/step/none) num_workers: 32 # 数据加载线程数 checkpoints: "./checkpoints/resnet18_scatter_7.4/" # 模型保存目录 restore: false restore_model: "checkpoints/resnet18_scatter_6.25/best.pth" # 模型恢复路径 # 验证参数 validation: num_workers: 32 # 数据加载线程数 val_batch_size: 128 # 测试批次大小 # 数据配置 data: dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集) train_batch_size: 128 # 训练批次大小 val_batch_size: 100 # 验证批次大小 num_workers: 32 # 数据加载线程数 data_train_dir: "../data_center/scatter/v4/train" # 训练数据集根目录 data_val_dir: "../data_center/scatter/v4/val" # 验证数据集根目录 transform: img_size: 224 # 图像尺寸 img_mean: 0.5 # 图像均值 img_std: 0.5 # 图像方差 RandomHorizontalFlip: 0.5 # 随机水平翻转概率 RandomRotation: 180 # 随机旋转角度 ColorJitter: 0.5 # 随机颜色抖动强度 # 日志与监控 logging: logging_dir: "./log/2025.7.4-scatter.txt" # 日志保存目录 tensorboard: true # 是否启用TensorBoard checkpoint_interval: 30 # 检查点保存间隔(epoch) # 分布式训练(可选) distributed: enabled: false # 是否启用分布式训练 backend: "nccl" # 分布式后端(nccl/gloo)