数据分析

This commit is contained in:
lee
2025-07-17 14:33:18 +08:00
parent 09f41f6289
commit 54898e30ec
12 changed files with 233 additions and 34 deletions

View File

@ -15,25 +15,25 @@ base:
# 模型配置
models:
backbone: 'resnet34'
channel_ratio: 1.0
backbone: 'resnet18'
channel_ratio: 0.75
# 训练参数
training:
epochs: 600 # 总训练轮次
batch_size: 128 # 批次大小
lr: 0.001 # 初始学习率
lr: 0.007 # 初始学习率
optimizer: "sgd" # 优化器类型
metric: 'arcface' # 损失函数类型可选arcface/cosface/sphereface/softmax
loss: "cross_entropy" # 损失函数类型可选cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax
lr_step: 10 # 学习率调整间隔epoch
lr_decay: 0.98 # 学习率衰减率
lr_decay: 0.95 # 学习率衰减率
weight_decay: 0.0005 # 权重衰减
scheduler: "cosine" # 学习率调度器可选cosine/cosine_warm/step/None
num_workers: 32 # 数据加载线程数
checkpoints: "./checkpoints/resnet34_20250612_scale=1.0/" # 模型保存目录
restore: false
restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
checkpoints: "./checkpoints/resnet18_20250717_scale=0.75_nosub/" # 模型保存目录
restore: true
restore_model: "./checkpoints/resnet18_20250716_scale=0.75_nosub/best.pth" # 模型恢复路径
cosine_t_0: 10 # 初始周期长度
cosine_t_mult: 1 # 周期长度倍率
cosine_eta_min: 0.00001 # 最小学习率
@ -49,8 +49,8 @@ data:
train_batch_size: 128 # 训练批次大小
val_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_train_dir: "../data_center/contrast_learning/data_base/train" # 训练数据集根目录
data_val_dir: "../data_center/contrast_learning/data_base/val" # 验证数据集根目录
data_train_dir: "../data_center/contrast_data/v2/train" # 训练数据集根目录
data_val_dir: "../data_center/contrast_data/v2/val" # 验证数据集根目录
transform:
img_size: 224 # 图像尺寸
@ -62,7 +62,7 @@ transform:
# 日志与监控
logging:
logging_dir: "./logs/resnet50_log" # 日志保存目录
logging_dir: "./logs/resnet18_scale=0.75_nosub_log" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch

View File

@ -0,0 +1,49 @@
# configs/similar_analysis.yml
# 专为模型训练对比设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 基础配置
base:
experiment_name: "model_comparison" # 实验名称(用于结果保存目录)
device: "cuda" # 训练设备cuda/cpu
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: true # 是否启用分布式训练
# 模型配置
models:
backbone: 'resnet18'
channel_ratio: 0.75
model_path: "../checkpoints/resnet18_1009/best.pth"
heatmap:
feature_layer: "layer4"
show_heatmap: true
# 数据配置
data:
dataset: "imagenet" # 数据集名称(示例用,可替换为实际数据集)
train_batch_size: 128 # 训练批次大小
val_batch_size: 8 # 验证批次大小
num_workers: 32 # 数据加载线程数
data_dir: "/home/lc/data_center/image_analysis/error_compare_subimg"
image_joint_pth: "/home/lc/data_center/image_analysis/error_compare_result"
transform:
img_size: 224 # 图像尺寸
img_mean: 0.5 # 图像均值
img_std: 0.5 # 图像方差
RandomHorizontalFlip: 0.5 # 随机水平翻转概率
RandomRotation: 180 # 随机旋转角度
ColorJitter: 0.5 # 随机颜色抖动强度
# 日志与监控
logging:
logging_dir: "./logs/resnet18_scale=0.75_nosub_log" # 日志保存目录
tensorboard: true # 是否启用TensorBoard
checkpoint_interval: 30 # 检查点保存间隔epoch
# 分布式训练(可选)
distributed:
enabled: false # 是否启用分布式训练
backend: "nccl" # 分布式后端nccl/gloo

View File

@ -4,23 +4,29 @@
# 数据配置
data:
source_dir: "../../data_center/contrast_data/total" # 数据集名称(示例用,可替换为实际数据集)
source_dir: "../../data_center/contrast_data/total" # 数据集名称(示例用,可替换为实际数据集)
train_dir: "../../data_center/contrast_data/v1/train" # 训练数据集根目录
val_dir: "../../data_center/contrast_data/v1/val" # 验证数据集根目录
data_extra_dir: "../../data_center/contrast_data/v1/extra"
max_files_ratio: 0.1
min_files: 10
split_ratio: 0.9
combine_scr_dir: "../../data_center/contrast_data/v1/val" # 合并数据集源目录·
combine_dst_dir: "../../data_center/contrast_data/v2/val" # 合并数据集目标目录
extend:
extend_same_dir: true
extend_extra: true
extend_extra_dir: "../../data_center/contrast_data/v1/extra"
extend_extra_dir: "../../data_center/contrast_data/v1/extra" # 扩展测试集数据
extend_train: true
extend_train_dir: "../../data_center/contrast_data/v1/train"
extend_train_dir: "../../data_center/contrast_data/v1/train" # 训练接数据扩展
limit:
count_limit: true
limit_count: 200
limit_dir: "../../data_center/contrast_data/v1/train"
limit_dir: "../../data_center/contrast_data/v1/train" # 限制单个样本数量
control:
combine: true # 是否进行子类数据集合并
split: false # 子类数据集拆解与扩增

View File

@ -8,7 +8,7 @@ base:
log_level: "info" # 日志级别debug/info/warning/error
embedding_size: 256 # 特征维度
pin_memory: true # 是否启用pin_memory
distributed: false # 是否启用分布式训练
distributed: false # 是否启用分布式训练 启用热力图时不能用分布式训练
# 模型配置
models:

View File

@ -22,7 +22,7 @@ data:
test_batch_size: 128 # 验证批次大小
num_workers: 32 # 数据加载线程数
half: true # 是否启用半精度数据
img_dirs_path: "/personalDocument/lic/base+stlib"
img_dirs_path: "/home/lc/data_center/baseStlib/pic/stlib_base" # base标准库图片存储路径
# img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
xlsx_pth: false # 过滤商品, 默认None不进行过滤
@ -42,7 +42,7 @@ logging:
save:
json_bin: "../search_library/yunhedian_05-09.json" # 保存整个json文件
json_path: "../feature_json/base+stlib/" # 保存单个json文件路径
json_path: "/home/lc/data_center/baseStlib/feature_json/stlib_base" # 保存单个json文件路径
error_barcodes: "error_barcodes.txt"
barcodes_statistics: "../search_library/barcodes_statistics.txt"
create_single_json: true # 是否保存单个json文件