多机并行计算
This commit is contained in:
@ -15,8 +15,8 @@ base:
|
||||
|
||||
# 模型配置
|
||||
models:
|
||||
backbone: 'resnet50'
|
||||
channel_ratio: 1.0
|
||||
backbone: 'resnet18'
|
||||
channel_ratio: 0.75
|
||||
|
||||
# 训练参数
|
||||
training:
|
||||
@ -31,9 +31,9 @@ training:
|
||||
weight_decay: 0.0005 # 权重衰减
|
||||
scheduler: "step" # 学习率调度器(可选:cosine/cosine_warm/step/None)
|
||||
num_workers: 32 # 数据加载线程数
|
||||
checkpoints: "./checkpoints/resnet50_electornic_20250807/" # 模型保存目录
|
||||
checkpoints: "./checkpoints/resnet18_pdd_test/" # 模型保存目录
|
||||
restore: false
|
||||
restore_model: "./checkpoints/resnet18_20250717_scale=0.75_nosub/best.pth" # 模型恢复路径
|
||||
restore_model: "./checkpoints/resnet50_electornic_20250807/best.pth" # 模型恢复路径
|
||||
cosine_t_0: 10 # 初始周期长度
|
||||
cosine_t_mult: 1 # 周期长度倍率
|
||||
cosine_eta_min: 0.00001 # 最小学习率
|
||||
@ -70,3 +70,5 @@ logging:
|
||||
distributed:
|
||||
enabled: false # 是否启用分布式训练
|
||||
backend: "nccl" # 分布式后端(nccl/gloo)
|
||||
node_rank: 0 # 节点编号
|
||||
node_num: 1 # 共计几个节点 一般几台机器就有几个节点
|
||||
|
Reference in New Issue
Block a user