训练数据前置处理与提升训练效率

This commit is contained in:
lee
2025-07-10 14:24:05 +08:00
parent 0701538a73
commit 09f41f6289
15 changed files with 430 additions and 116 deletions

26
configs/sub_data.yml Normal file
View File

@ -0,0 +1,26 @@
# configs/sub_data.yml
# 专为对比模型训练的数据集设计的配置文件
# 支持对比不同训练策略如蒸馏vs独立训练
# 数据配置
data:
source_dir: "../../data_center/contrast_data/total" # 数据集名称(示例用,可替换为实际数据集)
train_dir: "../../data_center/contrast_data/v1/train" # 训练数据集根目录
val_dir: "../../data_center/contrast_data/v1/val" # 验证数据集根目录
data_extra_dir: "../../data_center/contrast_data/v1/extra"
max_files_ratio: 0.1
min_files: 10
split_ratio: 0.9
extend:
extend_same_dir: true
extend_extra: true
extend_extra_dir: "../../data_center/contrast_data/v1/extra"
extend_train: true
extend_train_dir: "../../data_center/contrast_data/v1/train"
limit:
count_limit: true
limit_count: 200
limit_dir: "../../data_center/contrast_data/v1/train"