update prepara_data.py.

This commit is contained in:
Brainway
2022-10-19 13:48:43 +00:00
committed by Gitee
parent 14558af47b
commit d376fa8c5f

View File

@ -1,28 +1,38 @@
#encoding: utf-8
import os import os
import cv2
import numpy as np import numpy as np
import subprocess import subprocess
import random import random
#生成数据集
# ----------- 改写名称 -------------- # ----------- 改写名称 --------------
# index = 0 # index = 0
# src_dir = "/data/fineGrained/emptyJudge5" # src_dir = "../emptyJudge2/images/"
# dst_dir = src_dir + "_new" # dst_dir = src_dir
# os.makedirs(dst_dir, exist_ok=True) # os.remove('../emptyJudge2/image_class_labels.txt')
# os.remove('../emptyJudge2/images.txt')
# os.remove('../emptyJudge2/train_test_split.txt')
# if(os.path.exists(dst_dir)):
# pass
# else:
# os.makedirs(dst_dir)
# for sub in os.listdir(src_dir): # for sub in os.listdir(src_dir):
# sub_path = os.path.join(src_dir, sub) # sub_path = os.path.join(src_dir, sub)
# print(sub_path)
# sub_path_dst = os.path.join(dst_dir, sub) # sub_path_dst = os.path.join(dst_dir, sub)
# os.makedirs(sub_path_dst, exist_ok=True)
# for cur_f in os.listdir(sub_path): # for cur_f in os.listdir(sub_path):
# cur_img = os.path.join(sub_path, cur_f) # cur_img = os.path.join(sub_path, cur_f)
# cur_img_dst = os.path.join(sub_path_dst, "a%05d.jpg" % index) # cur_img_dst = os.path.join(sub_path_dst, "image%04d.jpg" % index)
# index += 1 # index += 1
# os.system("mv %s %s" % (cur_img, cur_img_dst)) # os.system("mv %s %s" % (cur_img, cur_img_dst))
# ----------- 删除过小图像 -------------- # ----------- 删除过小图像 --------------
# src_dir = "/data/fineGrained/emptyJudge5" # src_dir = "../emptyJudge2/images/"
# for sub in os.listdir(src_dir): # for sub in os.listdir(src_dir):
# sub_path = os.path.join(src_dir, sub) # sub_path = os.path.join(src_dir, sub)
# for cur_f in os.listdir(sub_path): # for cur_f in os.listdir(sub_path):
@ -37,83 +47,59 @@ import random
# ----------- 获取有效图片并写images.txt -------------- # ----------- 获取有效图片并写images.txt --------------
# src_dir = "/data/fineGrained/emptyJudge4/images" src_dir = "../emptyJudge2/images/"
# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"} src_dict = {"noempty":"0", "empty":"1"}
# all_dict = {"yesemp":[], "noemp":[], "hard": [], "stack": []} all_dict = {"noempty":[], "empty":[]}
# for sub, value in src_dict.items(): for sub, value in src_dict.items():
# sub_path = os.path.join(src_dir, sub) sub_path = os.path.join(src_dir, sub)
# for cur_f in os.listdir(sub_path): for cur_f in os.listdir(sub_path):
# all_dict[sub].append(os.path.join(sub, cur_f)) all_dict[sub].append(os.path.join(sub, cur_f))
#
# yesnum = len(all_dict["yesemp"]) yesnum = len(all_dict["empty"])
# nonum = len(all_dict["noemp"]) #print(yesnum)
# hardnum = len(all_dict["hard"]) nonum = len(all_dict["noempty"])
# stacknum = len(all_dict["stack"]) #print(nonum)
# thnum = min(yesnum, nonum, hardnum, stacknum) images_txt = "../emptyJudge2/images.txt"
# images_txt = src_dir + ".txt" index = 0
# index = 1
#
# def write_images(cur_list, thnum, fw, index): def write_images(cur_list, num, fw, index):
# for feat_path in random.sample(cur_list, thnum): for feat_path in random.sample(cur_list, num):
# fw.write(str(index) + " " + feat_path + "\n") fw.write(str(index) + " " + feat_path + "\n")
# index += 1 index += 1
# return index return index
#
# with open(images_txt, "w") as fw: with open(images_txt, "w") as fw:
# index = write_images(all_dict["noemp"], thnum, fw, index) index = write_images(all_dict["noempty"], nonum, fw, index)
# index = write_images(all_dict["yesemp"], thnum, fw, index) index = write_images(all_dict["empty"], yesnum, fw, index)
# index = write_images(all_dict["hard"], thnum, fw, index)
# index = write_images(all_dict["stack"], thnum, fw, index)
# ----------- 写 image_class_labels.txt + train_test_split.txt -------------- # ----------- 写 image_class_labels.txt + train_test_split.txt --------------
# src_dir = "/data/fineGrained/emptyJudge4" src_dir = "../emptyJudge2/"
# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"} src_dict = {"noempty":"0", "empty":"1"}
# images_txt = os.path.join(src_dir, "images.txt") images_txt = os.path.join(src_dir, "images.txt")
# image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt") image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt")
# imgs_cnt = 0 imgs_cnt = 0
# with open(image_class_labels_txt, "w") as fw: with open(image_class_labels_txt, "w") as fw:
# with open(images_txt, "r") as fr: with open(images_txt, "r") as fr:
# for cur_l in fr: for cur_l in fr:
# imgs_cnt += 1 imgs_cnt += 1
# img_index, img_f = cur_l.strip().split(" ") img_index, img_f = cur_l.strip().split(" ")
# folder_name = img_f.split("/")[0] folder_name = img_f.split("/")[0]
# if folder_name in src_dict: if folder_name in src_dict:
# cur_line = img_index + " " + str(int(src_dict[folder_name])+1) cur_line = img_index + " " + str(int(src_dict[folder_name])+1)
# fw.write(cur_line + "\n") fw.write(cur_line + "\n")
#
# train_num = int(imgs_cnt*0.85)
# print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt)
# all_list = [1]*train_num + [0]*(imgs_cnt-train_num)
# assert len(all_list) == imgs_cnt
# random.shuffle(all_list)
# train_test_split_txt = os.path.join(src_dir, "train_test_split.txt")
# with open(train_test_split_txt, "w") as fw:
# with open(images_txt, "r") as fr:
# for cur_l in fr:
# img_index, img_f = cur_l.strip().split(" ")
# cur_line = img_index + " " + str(all_list[int(img_index) - 1])
# fw.write(cur_line + "\n")
# ----------- 生成标准测试集 --------------
# src_dir = "/data/fineGrained/emptyJudge5/images"
# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "fly": "3", "stack": "4"}
# all_dict = {"noemp":[], "yesemp":[], "hard": [], "fly": [], "stack": []}
# for sub, value in src_dict.items():
# sub_path = os.path.join(src_dir, sub)
# for cur_f in os.listdir(sub_path):
# all_dict[sub].append(cur_f)
#
# dst_dir = src_dir + "_test"
# os.makedirs(dst_dir, exist_ok=True)
# for sub, value in src_dict.items():
# sub_path = os.path.join(src_dir, sub)
# sub_path_dst = os.path.join(dst_dir, sub)
# os.makedirs(sub_path_dst, exist_ok=True)
#
# cur_list = all_dict[sub]
# test_num = int(len(cur_list) * 0.05)
# for cur_f in random.sample(cur_list, test_num):
# cur_path = os.path.join(sub_path, cur_f)
# cur_path_dst = os.path.join(sub_path_dst, cur_f)
# os.system("cp %s %s" % (cur_path, cur_path_dst))
train_num = int(imgs_cnt*0.85)
print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt)
all_list = [1]*train_num + [0]*(imgs_cnt-train_num)
assert len(all_list) == imgs_cnt
random.shuffle(all_list)
train_test_split_txt = os.path.join(src_dir, "train_test_split.txt")
with open(train_test_split_txt, "w") as fw:
with open(images_txt, "r") as fr:
for cur_l in fr:
img_index, img_f = cur_l.strip().split(" ")
cur_line = img_index + " " + str(all_list[int(img_index) - 1])
fw.write(cur_line + "\n")