多机并行计算

This commit is contained in:
lee
2025-08-18 10:14:05 +08:00
parent 99a204ee22
commit c978787ff8
8 changed files with 43 additions and 25 deletions

View File

@ -68,7 +68,7 @@ logging:
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
enabled: true # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo
node_rank: 0 # 节点编号
node_num: 1 # 共计几个节点 一般几台机器就有几个节点
node_num: 2 # 共计几个节点 一般几台机器就有几个节点

View File

@ -27,10 +27,10 @@ data:
train_batch_size: 128 # 训练批次大小
val_batch_size: 8 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_dir: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix"
data_dir: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix_new"
image_joint_pth: "/home/lc/data_center/image_analysis/error_compare_result"
total_pkl: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix/total.pkl"
result_txt: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix/result.txt"
total_pkl: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix_new/total.pkl"
result_txt: "/home/lc/data_center/image_analysis/pic_pic_similar_maxtrix_new/result.txt"
transform:
img_size: 224 # 图像尺寸
@ -46,9 +46,9 @@ logging:
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
event:
oneToOne_max_th: 0.9
oneToSn_min_th: 0.6
event_save_dir: "/home/lc/works/realtime_yolov10s/online_yolov10s_resnetv11_20250702/yolos_tracking"
stdlib_image_path: "/testDataAndLogs/module_test_record/comparison/标准图测试数据/pic/stlib_base"
pickle_path: "event.pickle"
#event:
# oneToOne_max_th: 0.9
# oneToSn_min_th: 0.6
# event_save_dir: "/home/lc/works/realtime_yolov10s/online_yolov10s_resnetv11_20250702/yolos_tracking"
# stdlib_image_path: "/testDataAndLogs/module_test_record/comparison/标准图测试数据/pic/stlib_base"
# pickle_path: "event.pickle"

View File

@ -19,7 +19,7 @@ models:
channel_ratio: 1.0
model_path: "../checkpoints/resnet101_electornic_20250807/best.pth"
onnx_model: "../checkpoints/resnet101_electornic_20250807/best.onnx"
rknn_model: "../checkpoints/resnet101_electornic_20250807/resnet101_electornic.rknn"
rknn_model: "../checkpoints/resnet101_electornic_20250807/resnet101_electornic_3588.rknn"
rknn_batch_size: 1
# 日志与监控

View File

@ -12,9 +12,9 @@ base:
# 模型配置
models:
backbone: 'resnet101'
channel_ratio: 1.0
checkpoints: "../checkpoints/resnet101_electornic_20250807/best.pth"
backbone: 'resnet18'
channel_ratio: 0.75
checkpoints: "../checkpoints/resnet18_20250718_scale=0.75_nosub/best.pth"
# 数据配置
data:
@ -22,7 +22,7 @@ data:
test_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
half: true # 是否启用半精度数据
img_dirs_path: "/shareData/completed_data/scatter_data/electronic_scale/base/total" # base标准库图片存储路径
img_dirs_path: "/home/lc/data_center/baseStlib/pic/stlib_base" # base标准库图片存储路径
# img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
xlsx_pth: false # 过滤商品, 默认None不进行过滤
@ -42,7 +42,7 @@ logging:
save:
json_bin: "../search_library/resnet101_electronic.json" # 保存整个json文件
json_path: "/home/lc/data_center/baseStlib/feature_json/stlib_base_resnet18_sub" # 保存单个json文件路径
json_path: "/home/lc/data_center/baseStlib/feature_json/stlib_base_resnet18_sub_1k_合并" # 保存单个json文件路径
error_barcodes: "error_barcodes.txt"
barcodes_statistics: "../search_library/barcodes_statistics.txt"
create_single_json: false # 是否保存单个json文件
create_single_json: true # 是否保存单个json文件