diff --git a/prepara_data.py b/prepara_data.py index 1d779d5..7aa0e40 100755 --- a/prepara_data.py +++ b/prepara_data.py @@ -1,28 +1,38 @@ +#encoding: utf-8 import os -import cv2 import numpy as np import subprocess import random +#生成数据集 + + # ----------- 改写名称 -------------- # index = 0 -# src_dir = "/data/fineGrained/emptyJudge5" -# dst_dir = src_dir + "_new" -# os.makedirs(dst_dir, exist_ok=True) +# src_dir = "../emptyJudge2/images/" +# dst_dir = src_dir +# os.remove('../emptyJudge2/image_class_labels.txt') +# os.remove('../emptyJudge2/images.txt') +# os.remove('../emptyJudge2/train_test_split.txt') +# if(os.path.exists(dst_dir)): +# pass +# else: +# os.makedirs(dst_dir) + # for sub in os.listdir(src_dir): # sub_path = os.path.join(src_dir, sub) +# print(sub_path) # sub_path_dst = os.path.join(dst_dir, sub) -# os.makedirs(sub_path_dst, exist_ok=True) # for cur_f in os.listdir(sub_path): # cur_img = os.path.join(sub_path, cur_f) -# cur_img_dst = os.path.join(sub_path_dst, "a%05d.jpg" % index) +# cur_img_dst = os.path.join(sub_path_dst, "image%04d.jpg" % index) # index += 1 # os.system("mv %s %s" % (cur_img, cur_img_dst)) # ----------- 删除过小图像 -------------- -# src_dir = "/data/fineGrained/emptyJudge5" +# src_dir = "../emptyJudge2/images/" # for sub in os.listdir(src_dir): # sub_path = os.path.join(src_dir, sub) # for cur_f in os.listdir(sub_path): @@ -37,83 +47,59 @@ import random # ----------- 获取有效图片并写images.txt -------------- -# src_dir = "/data/fineGrained/emptyJudge4/images" -# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"} -# all_dict = {"yesemp":[], "noemp":[], "hard": [], "stack": []} -# for sub, value in src_dict.items(): -# sub_path = os.path.join(src_dir, sub) -# for cur_f in os.listdir(sub_path): -# all_dict[sub].append(os.path.join(sub, cur_f)) -# -# yesnum = len(all_dict["yesemp"]) -# nonum = len(all_dict["noemp"]) -# hardnum = len(all_dict["hard"]) -# stacknum = len(all_dict["stack"]) -# thnum = min(yesnum, nonum, hardnum, stacknum) -# images_txt = src_dir + ".txt" -# index = 1 -# -# def write_images(cur_list, thnum, fw, index): -# for feat_path in random.sample(cur_list, thnum): -# fw.write(str(index) + " " + feat_path + "\n") -# index += 1 -# return index -# -# with open(images_txt, "w") as fw: -# index = write_images(all_dict["noemp"], thnum, fw, index) -# index = write_images(all_dict["yesemp"], thnum, fw, index) -# index = write_images(all_dict["hard"], thnum, fw, index) -# index = write_images(all_dict["stack"], thnum, fw, index) +src_dir = "../emptyJudge2/images/" +src_dict = {"noempty":"0", "empty":"1"} +all_dict = {"noempty":[], "empty":[]} +for sub, value in src_dict.items(): + sub_path = os.path.join(src_dir, sub) + for cur_f in os.listdir(sub_path): + all_dict[sub].append(os.path.join(sub, cur_f)) + +yesnum = len(all_dict["empty"]) +#print(yesnum) +nonum = len(all_dict["noempty"]) +#print(nonum) +images_txt = "../emptyJudge2/images.txt" +index = 0 + + +def write_images(cur_list, num, fw, index): + for feat_path in random.sample(cur_list, num): + fw.write(str(index) + " " + feat_path + "\n") + index += 1 + return index + +with open(images_txt, "w") as fw: + index = write_images(all_dict["noempty"], nonum, fw, index) + index = write_images(all_dict["empty"], yesnum, fw, index) + + # ----------- 写 image_class_labels.txt + train_test_split.txt -------------- -# src_dir = "/data/fineGrained/emptyJudge4" -# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"} -# images_txt = os.path.join(src_dir, "images.txt") -# image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt") -# imgs_cnt = 0 -# with open(image_class_labels_txt, "w") as fw: -# with open(images_txt, "r") as fr: -# for cur_l in fr: -# imgs_cnt += 1 -# img_index, img_f = cur_l.strip().split(" ") -# folder_name = img_f.split("/")[0] -# if folder_name in src_dict: -# cur_line = img_index + " " + str(int(src_dict[folder_name])+1) -# fw.write(cur_line + "\n") -# -# train_num = int(imgs_cnt*0.85) -# print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt) -# all_list = [1]*train_num + [0]*(imgs_cnt-train_num) -# assert len(all_list) == imgs_cnt -# random.shuffle(all_list) -# train_test_split_txt = os.path.join(src_dir, "train_test_split.txt") -# with open(train_test_split_txt, "w") as fw: -# with open(images_txt, "r") as fr: -# for cur_l in fr: -# img_index, img_f = cur_l.strip().split(" ") -# cur_line = img_index + " " + str(all_list[int(img_index) - 1]) -# fw.write(cur_line + "\n") - -# ----------- 生成标准测试集 -------------- -# src_dir = "/data/fineGrained/emptyJudge5/images" -# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "fly": "3", "stack": "4"} -# all_dict = {"noemp":[], "yesemp":[], "hard": [], "fly": [], "stack": []} -# for sub, value in src_dict.items(): -# sub_path = os.path.join(src_dir, sub) -# for cur_f in os.listdir(sub_path): -# all_dict[sub].append(cur_f) -# -# dst_dir = src_dir + "_test" -# os.makedirs(dst_dir, exist_ok=True) -# for sub, value in src_dict.items(): -# sub_path = os.path.join(src_dir, sub) -# sub_path_dst = os.path.join(dst_dir, sub) -# os.makedirs(sub_path_dst, exist_ok=True) -# -# cur_list = all_dict[sub] -# test_num = int(len(cur_list) * 0.05) -# for cur_f in random.sample(cur_list, test_num): -# cur_path = os.path.join(sub_path, cur_f) -# cur_path_dst = os.path.join(sub_path_dst, cur_f) -# os.system("cp %s %s" % (cur_path, cur_path_dst)) +src_dir = "../emptyJudge2/" +src_dict = {"noempty":"0", "empty":"1"} +images_txt = os.path.join(src_dir, "images.txt") +image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt") +imgs_cnt = 0 +with open(image_class_labels_txt, "w") as fw: + with open(images_txt, "r") as fr: + for cur_l in fr: + imgs_cnt += 1 + img_index, img_f = cur_l.strip().split(" ") + folder_name = img_f.split("/")[0] + if folder_name in src_dict: + cur_line = img_index + " " + str(int(src_dict[folder_name])+1) + fw.write(cur_line + "\n") +train_num = int(imgs_cnt*0.85) +print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt) +all_list = [1]*train_num + [0]*(imgs_cnt-train_num) +assert len(all_list) == imgs_cnt +random.shuffle(all_list) +train_test_split_txt = os.path.join(src_dir, "train_test_split.txt") +with open(train_test_split_txt, "w") as fw: + with open(images_txt, "r") as fr: + for cur_l in fr: + img_index, img_f = cur_l.strip().split(" ") + cur_line = img_index + " " + str(all_list[int(img_index) - 1]) + fw.write(cur_line + "\n")