#encoding: utf-8 import os import numpy as np import subprocess import random #生成数据集 # ----------- 改写名称 -------------- # index = 0 # src_dir = "../emptyJudge2/images/" # dst_dir = src_dir # os.remove('../emptyJudge2/image_class_labels.txt') # os.remove('../emptyJudge2/images.txt') # os.remove('../emptyJudge2/train_test_split.txt') # if(os.path.exists(dst_dir)): # pass # else: # os.makedirs(dst_dir) # for sub in os.listdir(src_dir): # sub_path = os.path.join(src_dir, sub) # print(sub_path) # sub_path_dst = os.path.join(dst_dir, sub) # for cur_f in os.listdir(sub_path): # cur_img = os.path.join(sub_path, cur_f) # cur_img_dst = os.path.join(sub_path_dst, "image%04d.jpg" % index) # index += 1 # os.system("mv %s %s" % (cur_img, cur_img_dst)) # ----------- 删除过小图像 -------------- # src_dir = "../emptyJudge2/images/" # for sub in os.listdir(src_dir): # sub_path = os.path.join(src_dir, sub) # for cur_f in os.listdir(sub_path): # filepath = os.path.join(sub_path, cur_f) # res = subprocess.check_output(['file', filepath]) # pp = res.decode("utf-8").split(",")[-2] # height = int(pp.split("x")[1]) # width = int(pp.split("x")[0]) # min_l = min(height, width) # if min_l <= 448: # os.system("rm %s" % filepath) # ----------- 获取有效图片并写images.txt -------------- src_dir = "../emptyJudge2/images/" src_dict = {"noempty":"0", "empty":"1"} all_dict = {"noempty":[], "empty":[]} for sub, value in src_dict.items(): sub_path = os.path.join(src_dir, sub) for cur_f in os.listdir(sub_path): all_dict[sub].append(os.path.join(sub, cur_f)) yesnum = len(all_dict["empty"]) #print(yesnum) nonum = len(all_dict["noempty"]) #print(nonum) images_txt = "../emptyJudge2/images.txt" index = 0 def write_images(cur_list, num, fw, index): for feat_path in random.sample(cur_list, num): fw.write(str(index) + " " + feat_path + "\n") index += 1 return index with open(images_txt, "w") as fw: index = write_images(all_dict["noempty"], nonum, fw, index) index = write_images(all_dict["empty"], yesnum, fw, index) # ----------- 写 image_class_labels.txt + train_test_split.txt -------------- src_dir = "../emptyJudge2/" src_dict = {"noempty":"0", "empty":"1"} images_txt = os.path.join(src_dir, "images.txt") image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt") imgs_cnt = 0 with open(image_class_labels_txt, "w") as fw: with open(images_txt, "r") as fr: for cur_l in fr: imgs_cnt += 1 img_index, img_f = cur_l.strip().split(" ") folder_name = img_f.split("/")[0] if folder_name in src_dict: cur_line = img_index + " " + str(int(src_dict[folder_name])+1) fw.write(cur_line + "\n") train_num = int(imgs_cnt*0.85) print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt) all_list = [1]*train_num + [0]*(imgs_cnt-train_num) assert len(all_list) == imgs_cnt random.shuffle(all_list) train_test_split_txt = os.path.join(src_dir, "train_test_split.txt") with open(train_test_split_txt, "w") as fw: with open(images_txt, "r") as fr: for cur_l in fr: img_index, img_f = cur_l.strip().split(" ") cur_line = img_index + " " + str(all_list[int(img_index) - 1]) fw.write(cur_line + "\n")