From 615c91feb6a1a428570993ee88f115d908841505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=99=A8?= Date: Thu, 29 Dec 2022 13:45:07 +0800 Subject: [PATCH 1/2] masterUpdate --- ieemoo-ai-isempty.py | 3 ++- train.py | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/ieemoo-ai-isempty.py b/ieemoo-ai-isempty.py index 06c1248..49ae36c 100755 --- a/ieemoo-ai-isempty.py +++ b/ieemoo-ai-isempty.py @@ -15,6 +15,7 @@ sys.path.insert(0, ".") import logging.config from skywalking import agent, config +os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" SW_SERVER = os.environ.get('SW_AGENT_COLLECTOR_BACKEND_SERVICES') SW_SERVICE_NAME = os.environ.get('SW_AGENT_NAME') if SW_SERVER and SW_SERVICE_NAME: @@ -96,7 +97,7 @@ class Predictor(object): probs = torch.nn.Softmax(dim=-1)(part_logits) topN = torch.argsort(probs, dim=-1, descending=True).tolist() clas_ids = topN[0][0] - clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1 + #clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1 #print("cur_img result: class id: %d, score: %0.3f" % (clas_ids, probs[0, clas_ids].item())) result["success"] = "true" result["rst_cls"] = str(clas_ids) diff --git a/train.py b/train.py index bf63476..90a7b21 100755 --- a/train.py +++ b/train.py @@ -63,6 +63,15 @@ def save_model(args, model): torch.save(checkpoint, model_checkpoint) logger.info("Saved model checkpoint to [DIR: %s]", args.output_dir) +def save_eve_model(args, model, eve_name): + model_to_save = model.module if hasattr(model, 'module') else model + model_checkpoint = os.path.join(args.output_dir, "%s_checkpoint.bin" % eve_name) + checkpoint = { + 'model': model_to_save.state_dict(), + } + torch.save(checkpoint, model_checkpoint) + logger.info("Saved model checkpoint to [DIR: %s]", args.output_dir) + def setup(args): # Prepare model @@ -265,6 +274,8 @@ def train(args, model): save_model(args, model) best_acc = accuracy logger.info("best accuracy so far: %f" % best_acc) + if int(global_step)%10000 == 0: + save_eve_model(args, model, str(global_step)) model.train() if global_step % t_total == 0: @@ -295,7 +306,7 @@ def main(): help="Name of this run. Used for monitoring.") parser.add_argument("--dataset", choices=["CUB_200_2011", "car", "dog", "nabirds", "INat2017", "emptyJudge5", "emptyJudge4"], default="CUB_200_2011", help="Which dataset.") - parser.add_argument('--data_root', type=str, default='/data/fineGrained') + parser.add_argument('--data_root', type=str, default='/data/pfc/fineGrained') parser.add_argument("--model_type", choices=["ViT-B_16", "ViT-B_32", "ViT-L_16", "ViT-L_32", "ViT-H_14"], default="ViT-B_16",help="Which variant to use.") parser.add_argument("--pretrained_dir", type=str, default="ckpts/ViT-B_16.npz", @@ -309,7 +320,7 @@ def main(): help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=16, type=int, help="Total batch size for eval.") - parser.add_argument("--eval_every", default=200, type=int, + parser.add_argument("--eval_every", default=200, type=int, #200 help="Run prediction on validation set every so many steps." "Will always run one evaluation at the end of training.") @@ -317,7 +328,7 @@ def main(): help="The initial learning rate for SGD.") parser.add_argument("--weight_decay", default=0, type=float, help="Weight deay if we apply some.") - parser.add_argument("--num_steps", default=8000, type=int, #100000 + parser.add_argument("--num_steps", default=40000, type=int, #100000 help="Total number of training epochs to perform.") parser.add_argument("--decay_type", choices=["cosine", "linear"], default="cosine", help="How to decay the learning rate.") @@ -352,9 +363,9 @@ def main(): # Setup CUDA, GPU & distributed training if args.local_rank == -1: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count()) + #print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count()) args.n_gpu = torch.cuda.device_count() - print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count()) + #print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count()) else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) From 82c296d18428180e1d3afbfac53b49391f0dea65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=99=A8?= Date: Thu, 29 Dec 2022 14:00:41 +0800 Subject: [PATCH 2/2] develop update --- ieemoo-ai-isempty.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ieemoo-ai-isempty.py b/ieemoo-ai-isempty.py index 49ae36c..ac9877e 100755 --- a/ieemoo-ai-isempty.py +++ b/ieemoo-ai-isempty.py @@ -97,7 +97,7 @@ class Predictor(object): probs = torch.nn.Softmax(dim=-1)(part_logits) topN = torch.argsort(probs, dim=-1, descending=True).tolist() clas_ids = topN[0][0] - #clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1 + clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1 #print("cur_img result: class id: %d, score: %0.3f" % (clas_ids, probs[0, clas_ids].item())) result["success"] = "true" result["rst_cls"] = str(clas_ids)