update prepara_data.py.

2022-10-19 13:48:43 +00:00
parent 14558af47b
commit d376fa8c5f
1 changed files with 70 additions and 84 deletions
--- a/prepara_data.py
+++ b/prepara_data.py
@ -1,28 +1,38 @@
+#encoding: utf-8
 import os
-import cv2
 import numpy as np
 import subprocess
 import random


+#生成数据集
+
+
 # ----------- 改写名称 --------------
 # index = 0
-# src_dir = "/data/fineGrained/emptyJudge5"
-# dst_dir = src_dir + "_new"
-# os.makedirs(dst_dir, exist_ok=True)
+# src_dir = "../emptyJudge2/images/"
+# dst_dir = src_dir 
+# os.remove('../emptyJudge2/image_class_labels.txt')
+# os.remove('../emptyJudge2/images.txt')
+# os.remove('../emptyJudge2/train_test_split.txt')
+# if(os.path.exists(dst_dir)):
+#     pass
+# else:
+#     os.makedirs(dst_dir)
+
 # for sub in os.listdir(src_dir):
 #     sub_path = os.path.join(src_dir, sub)
+#     print(sub_path)
 #     sub_path_dst = os.path.join(dst_dir, sub)
-#     os.makedirs(sub_path_dst, exist_ok=True)
 #     for cur_f in os.listdir(sub_path):
 #         cur_img = os.path.join(sub_path, cur_f)
-#         cur_img_dst = os.path.join(sub_path_dst, "a%05d.jpg" % index)
+#         cur_img_dst = os.path.join(sub_path_dst, "image%04d.jpg" % index)
 #         index += 1
 #         os.system("mv %s %s" % (cur_img, cur_img_dst))


 # ----------- 删除过小图像 --------------
-# src_dir = "/data/fineGrained/emptyJudge5"
+# src_dir = "../emptyJudge2/images/"
 # for sub in os.listdir(src_dir):
 #     sub_path = os.path.join(src_dir, sub)
 #     for cur_f in os.listdir(sub_path):
@ -37,83 +47,59 @@ import random


 # ----------- 获取有效图片并写images.txt --------------
-# src_dir = "/data/fineGrained/emptyJudge4/images"
-# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"}
-# all_dict = {"yesemp":[], "noemp":[], "hard": [], "stack": []}
-# for sub, value in src_dict.items():
-#     sub_path = os.path.join(src_dir, sub)
-#     for cur_f in os.listdir(sub_path):
-#         all_dict[sub].append(os.path.join(sub, cur_f))
-#
-# yesnum = len(all_dict["yesemp"])
-# nonum = len(all_dict["noemp"])
-# hardnum = len(all_dict["hard"])
-# stacknum = len(all_dict["stack"])
-# thnum = min(yesnum, nonum, hardnum, stacknum)
-# images_txt = src_dir + ".txt"
-# index = 1
-#
-# def write_images(cur_list, thnum, fw, index):
-#     for feat_path in random.sample(cur_list, thnum):
-#         fw.write(str(index) + " " + feat_path + "\n")
-#         index += 1
-#     return index
-#
-# with open(images_txt, "w") as fw:
-#     index = write_images(all_dict["noemp"], thnum, fw, index)
-#     index = write_images(all_dict["yesemp"], thnum, fw, index)
-#     index = write_images(all_dict["hard"], thnum, fw, index)
-#     index = write_images(all_dict["stack"], thnum, fw, index)
+src_dir = "../emptyJudge2/images/"
+src_dict = {"noempty":"0", "empty":"1"}
+all_dict = {"noempty":[], "empty":[]}
+for sub, value in src_dict.items():
+    sub_path = os.path.join(src_dir, sub)
+    for cur_f in os.listdir(sub_path):
+        all_dict[sub].append(os.path.join(sub, cur_f))
+
+yesnum = len(all_dict["empty"])
+#print(yesnum)
+nonum = len(all_dict["noempty"])
+#print(nonum)
+images_txt = "../emptyJudge2/images.txt"
+index = 0
+
+
+def write_images(cur_list, num, fw, index):
+    for feat_path in random.sample(cur_list, num):
+        fw.write(str(index) + " " + feat_path + "\n")
+        index += 1
+    return index
+
+with open(images_txt, "w") as fw:
+    index = write_images(all_dict["noempty"], nonum, fw, index)
+    index = write_images(all_dict["empty"], yesnum, fw, index)
+    
+    

 # ----------- 写 image_class_labels.txt + train_test_split.txt --------------
-# src_dir = "/data/fineGrained/emptyJudge4"
-# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "stack": "3"}
-# images_txt = os.path.join(src_dir, "images.txt")
-# image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt")
-# imgs_cnt = 0
-# with open(image_class_labels_txt, "w") as fw:
-#     with open(images_txt, "r") as fr:
-#         for cur_l in fr:
-#             imgs_cnt += 1
-#             img_index, img_f = cur_l.strip().split(" ")
-#             folder_name = img_f.split("/")[0]
-#             if folder_name in src_dict:
-#                 cur_line = img_index + " " + str(int(src_dict[folder_name])+1)
-#                 fw.write(cur_line + "\n")
-#
-# train_num = int(imgs_cnt*0.85)
-# print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt)
-# all_list = [1]*train_num + [0]*(imgs_cnt-train_num)
-# assert len(all_list) == imgs_cnt
-# random.shuffle(all_list)
-# train_test_split_txt = os.path.join(src_dir, "train_test_split.txt")
-# with open(train_test_split_txt, "w") as fw:
-#     with open(images_txt, "r") as fr:
-#         for cur_l in fr:
-#             img_index, img_f = cur_l.strip().split(" ")
-#             cur_line = img_index + " " + str(all_list[int(img_index) - 1])
-#             fw.write(cur_line + "\n")
-
-# ----------- 生成标准测试集 --------------
-# src_dir = "/data/fineGrained/emptyJudge5/images"
-# src_dict = {"noemp":"0", "yesemp":"1", "hard": "2", "fly": "3", "stack": "4"}
-# all_dict = {"noemp":[],  "yesemp":[], "hard": [],  "fly": [], "stack": []}
-# for sub, value in src_dict.items():
-#     sub_path = os.path.join(src_dir, sub)
-#     for cur_f in os.listdir(sub_path):
-#         all_dict[sub].append(cur_f)
-#
-# dst_dir = src_dir + "_test"
-# os.makedirs(dst_dir, exist_ok=True)
-# for sub, value in src_dict.items():
-#     sub_path = os.path.join(src_dir, sub)
-#     sub_path_dst = os.path.join(dst_dir, sub)
-#     os.makedirs(sub_path_dst, exist_ok=True)
-#
-#     cur_list = all_dict[sub]
-#     test_num = int(len(cur_list) * 0.05)
-#     for cur_f in random.sample(cur_list, test_num):
-#         cur_path = os.path.join(sub_path, cur_f)
-#         cur_path_dst = os.path.join(sub_path_dst, cur_f)
-#         os.system("cp %s %s" % (cur_path, cur_path_dst))
+src_dir = "../emptyJudge2/"
+src_dict = {"noempty":"0", "empty":"1"}
+images_txt = os.path.join(src_dir, "images.txt")
+image_class_labels_txt = os.path.join(src_dir, "image_class_labels.txt")
+imgs_cnt = 0
+with open(image_class_labels_txt, "w") as fw:
+    with open(images_txt, "r") as fr:
+        for cur_l in fr:
+            imgs_cnt += 1
+            img_index, img_f = cur_l.strip().split(" ")
+            folder_name = img_f.split("/")[0]
+            if folder_name in src_dict:
+                cur_line = img_index + " " + str(int(src_dict[folder_name])+1)
+                fw.write(cur_line + "\n")

+train_num = int(imgs_cnt*0.85)
+print("train_num= ", train_num, ", imgs_cnt= ", imgs_cnt)
+all_list = [1]*train_num + [0]*(imgs_cnt-train_num)
+assert len(all_list) == imgs_cnt
+random.shuffle(all_list)
+train_test_split_txt = os.path.join(src_dir, "train_test_split.txt")
+with open(train_test_split_txt, "w") as fw:
+    with open(images_txt, "r") as fr:
+        for cur_l in fr:
+            img_index, img_f = cur_l.strip().split(" ")
+            cur_line = img_index + " " + str(all_list[int(img_index) - 1])
+            fw.write(cur_line + "\n")