Files
ieemoo-ai-imageassessment/contrast/utils.py
2024-11-27 15:37:10 +08:00

64 lines
2.0 KiB
Python

"""Train List 训练列表
格式:
ImagePath Label
示例:
/data/WebFace/0124920/003.jpg 10572
/data/WebFace/0124920/012.jpg 10572
/data/WebFace/0124920/020.jpg 10572
"""
import os
import os.path as osp
from imutils import paths
def generate_list(images_directory, saved_name=None):
"""生成数据列表
Args:
images_directory: 人脸数据目录,通常包含多个子文件夹。如
WebFace和LFW的格式
Returns:
data_list: [<路径> <标签>]
"""
subdirs = os.listdir(images_directory)
num_ids = len(subdirs)
data_list = []
for i in range(num_ids):
subdir = osp.join(images_directory, subdirs[i])
files = os.listdir(subdir)
paths = [osp.join(subdir, file) for file in files]
# 添加ID作为其人脸标签
paths_with_Id = [f"{p} {i}\n" for p in paths]
data_list.extend(paths_with_Id)
if saved_name:
with open(saved_name, 'w', encoding='utf-8') as f:
f.writelines(data_list)
return data_list
def transform_clean_list(webface_directory, cleaned_list_path):
"""转换webface的干净列表格式
Args:
webface_directory: WebFace数据目录
cleaned_list_path: cleaned_list.txt路径
Returns:
cleaned_list: 转换后的数据列表
"""
with open(cleaned_list_path, encoding='utf-8') as f:
cleaned_list = f.readlines()
cleaned_list = [p.replace('\\', '/') for p in cleaned_list]
cleaned_list = [osp.join(webface_directory, p) for p in cleaned_list]
return cleaned_list
def remove_dirty_image(webface_directory, cleaned_list):
cleaned_list = set([c.split()[0] for c in cleaned_list])
for p in paths.list_images(webface_directory):
if p not in cleaned_list:
print(f"remove {p}")
os.remove(p)
if __name__ == '__main__':
data = '/data/CASIA-WebFace/'
lst = '/data/cleaned_list.txt'
cleaned_list = transform_clean_list(data, lst)
remove_dirty_image(data, cleaned_list)