Files
ieemoo-ai-contrast/data_preprocessing/create_extra.py
2025-07-07 15:19:22 +08:00

93 lines
3.0 KiB
Python

import os
import shutil
from pathlib import Path
def count_files(directory):
"""统计目录中的文件数量"""
try:
return len([f for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))])
except Exception as e:
print(f"无法统计目录 {directory}: {e}")
return 0
def clear_empty_dirs(path):
"""
删除空目录
:param path: 目录路径
"""
for root, dirs, files in os.walk(path, topdown=False):
for dir_name in dirs:
dir_path = os.path.join(root, dir_name)
try:
if not os.listdir(dir_path):
os.rmdir(dir_path)
print(f"Deleted empty directory: {dir_path}")
except Exception as e:
print(f"Error: {e.strerror}")
def megre_subdirs(pth):
for roots, dir_names, files in os.walk(pth):
print(f"image {dir_names}")
for image in dir_names:
inner_dir_path = os.path.join(pth, image)
for inner_roots, inner_dirs, inner_files in os.walk(inner_dir_path):
for inner_dir in inner_dirs:
src_dir = os.path.join(inner_roots, inner_dir)
dest_dir = os.path.join(pth, inner_dir)
# shutil.copytree(src_dir, dest_dir)
shutil.move(src_dir, dest_dir)
print(f"Copied {inner_dir} to {pth}")
clear_empty_dirs(pth)
def split_subdirs(source_dir, target_dir, max_files=10):
"""
复制文件数≤max_files的子目录到目标目录
:param source_dir: 源目录路径
:param target_dir: 目标目录路径
:param max_files: 最大文件数阈值
"""
megre_subdirs(source_dir) # 合并子目录,删除上级目录
# 创建目标目录
Path(target_dir).mkdir(parents=True, exist_ok=True)
print(f"开始处理目录: {source_dir}")
print(f"目标目录: {target_dir}")
print(f"筛选条件: 文件数 ≤ {max_files}\n")
# 遍历源目录
for subdir in os.listdir(source_dir):
subdir_path = os.path.join(source_dir, subdir)
if not os.path.isdir(subdir_path):
continue
try:
file_count = count_files(subdir_path)
if file_count <= max_files:
print(f"复制 {subdir} (包含 {file_count} 个文件)")
dest_path = os.path.join(target_dir, subdir)
# 如果目标目录已存在则跳过
if os.path.exists(dest_path):
print(f"目录已存在,跳过: {dest_path}")
continue
# shutil.copytree(subdir_path, dest_path)
shutil.move(subdir_path, dest_path)
except Exception as e:
print(f"处理目录 {subdir} 时出错: {e}")
print("\n处理完成")
if __name__ == "__main__":
# 配置路径
SOURCE_DIR = r"C:\Users\123\Desktop\test1\scatter_sub_class"
TARGET_DIR = "scatter_mini"
# 执行复制操作
split_subdirs(SOURCE_DIR, TARGET_DIR)