64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
"""Train List 训练列表
|
|
格式:
|
|
ImagePath Label
|
|
|
|
示例:
|
|
/data/WebFace/0124920/003.jpg 10572
|
|
/data/WebFace/0124920/012.jpg 10572
|
|
/data/WebFace/0124920/020.jpg 10572
|
|
"""
|
|
|
|
import os
|
|
import os.path as osp
|
|
from imutils import paths
|
|
|
|
def generate_list(images_directory, saved_name=None):
|
|
"""生成数据列表
|
|
Args:
|
|
images_directory: 人脸数据目录,通常包含多个子文件夹。如
|
|
WebFace和LFW的格式
|
|
Returns:
|
|
data_list: [<路径> <标签>]
|
|
"""
|
|
subdirs = os.listdir(images_directory)
|
|
num_ids = len(subdirs)
|
|
data_list = []
|
|
for i in range(num_ids):
|
|
subdir = osp.join(images_directory, subdirs[i])
|
|
files = os.listdir(subdir)
|
|
paths = [osp.join(subdir, file) for file in files]
|
|
# 添加ID作为其人脸标签
|
|
paths_with_Id = [f"{p} {i}\n" for p in paths]
|
|
data_list.extend(paths_with_Id)
|
|
|
|
if saved_name:
|
|
with open(saved_name, 'w', encoding='utf-8') as f:
|
|
f.writelines(data_list)
|
|
return data_list
|
|
|
|
def transform_clean_list(webface_directory, cleaned_list_path):
|
|
"""转换webface的干净列表格式
|
|
Args:
|
|
webface_directory: WebFace数据目录
|
|
cleaned_list_path: cleaned_list.txt路径
|
|
Returns:
|
|
cleaned_list: 转换后的数据列表
|
|
"""
|
|
with open(cleaned_list_path, encoding='utf-8') as f:
|
|
cleaned_list = f.readlines()
|
|
cleaned_list = [p.replace('\\', '/') for p in cleaned_list]
|
|
cleaned_list = [osp.join(webface_directory, p) for p in cleaned_list]
|
|
return cleaned_list
|
|
|
|
def remove_dirty_image(webface_directory, cleaned_list):
|
|
cleaned_list = set([c.split()[0] for c in cleaned_list])
|
|
for p in paths.list_images(webface_directory):
|
|
if p not in cleaned_list:
|
|
print(f"remove {p}")
|
|
os.remove(p)
|
|
|
|
if __name__ == '__main__':
|
|
data = '/data/CASIA-WebFace/'
|
|
lst = '/data/cleaned_list.txt'
|
|
cleaned_list = transform_clean_list(data, lst)
|
|
remove_dirty_image(data, cleaned_list) |