update
This commit is contained in:
@ -12,7 +12,27 @@ from torchvision import transforms
|
|||||||
# import logging.config as log_config
|
# import logging.config as log_config
|
||||||
sys.path.insert(0, ".")
|
sys.path.insert(0, ".")
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
#Flask对外服务接口
|
#Flask对外服务接口
|
||||||
|
=======
|
||||||
|
import logging.config
|
||||||
|
from skywalking import agent, config
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||||
|
SW_SERVER = os.environ.get('SW_AGENT_COLLECTOR_BACKEND_SERVICES')
|
||||||
|
SW_SERVICE_NAME = os.environ.get('SW_AGENT_NAME')
|
||||||
|
if SW_SERVER and SW_SERVICE_NAME:
|
||||||
|
config.init() #采集服务的地址,给自己的服务起个名称
|
||||||
|
#config.init(collector="123.60.56.51:11800", service='ieemoo-ai-search') #采集服务的地址,给自己的服务起个名称
|
||||||
|
agent.start()
|
||||||
|
def setup_logging(path):
|
||||||
|
if os.path.exists(path):
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
config = json.load(f)
|
||||||
|
logging.config.dictConfig(config)
|
||||||
|
logger = logging.getLogger("root")
|
||||||
|
return logger
|
||||||
|
logger = setup_logging('utils/logging.json')
|
||||||
|
>>>>>>> develop
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
41
train.py
41
train.py
@ -57,9 +57,29 @@ def reduce_mean(tensor, nprocs):
|
|||||||
|
|
||||||
#保存模型
|
#保存模型
|
||||||
def save_model(args, model):
|
def save_model(args, model):
|
||||||
|
<<<<<<< HEAD
|
||||||
model_checkpoint = os.path.join("../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.bin")
|
model_checkpoint = os.path.join("../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.bin")
|
||||||
torch.save(model, model_checkpoint)
|
torch.save(model, model_checkpoint)
|
||||||
logger.info("Saved model checkpoint to [File: %s]", "../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.bin")
|
logger.info("Saved model checkpoint to [File: %s]", "../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.bin")
|
||||||
|
=======
|
||||||
|
model_to_save = model.module if hasattr(model, 'module') else model
|
||||||
|
model_checkpoint = os.path.join(args.output_dir, "%s_checkpoint.bin" % args.name)
|
||||||
|
checkpoint = {
|
||||||
|
'model': model_to_save.state_dict(),
|
||||||
|
}
|
||||||
|
torch.save(checkpoint, model_checkpoint)
|
||||||
|
logger.info("Saved model checkpoint to [DIR: %s]", args.output_dir)
|
||||||
|
|
||||||
|
def save_eve_model(args, model, eve_name):
|
||||||
|
model_to_save = model.module if hasattr(model, 'module') else model
|
||||||
|
model_checkpoint = os.path.join(args.output_dir, "%s_checkpoint.bin" % eve_name)
|
||||||
|
checkpoint = {
|
||||||
|
'model': model_to_save.state_dict(),
|
||||||
|
}
|
||||||
|
torch.save(checkpoint, model_checkpoint)
|
||||||
|
logger.info("Saved model checkpoint to [DIR: %s]", args.output_dir)
|
||||||
|
|
||||||
|
>>>>>>> develop
|
||||||
|
|
||||||
#根据数据集配置模型
|
#根据数据集配置模型
|
||||||
def setup(args):
|
def setup(args):
|
||||||
@ -249,6 +269,8 @@ def train(args, model):
|
|||||||
save_model(args, model)
|
save_model(args, model)
|
||||||
best_acc = accuracy
|
best_acc = accuracy
|
||||||
logger.info("best accuracy so far: %f" % best_acc)
|
logger.info("best accuracy so far: %f" % best_acc)
|
||||||
|
if int(global_step)%10000 == 0:
|
||||||
|
save_eve_model(args, model, str(global_step))
|
||||||
model.train()
|
model.train()
|
||||||
|
|
||||||
if global_step % t_total == 0:
|
if global_step % t_total == 0:
|
||||||
@ -280,8 +302,13 @@ def main():
|
|||||||
parser.add_argument("--name", type=str, default='ieemooempty',
|
parser.add_argument("--name", type=str, default='ieemooempty',
|
||||||
help="Name of this run. Used for monitoring.")
|
help="Name of this run. Used for monitoring.")
|
||||||
parser.add_argument("--dataset", choices=["CUB_200_2011", "car", "dog", "nabirds", "INat2017", "emptyJudge5", "emptyJudge4"],
|
parser.add_argument("--dataset", choices=["CUB_200_2011", "car", "dog", "nabirds", "INat2017", "emptyJudge5", "emptyJudge4"],
|
||||||
|
<<<<<<< HEAD
|
||||||
default="emptyJudge5", help="Which dataset.")
|
default="emptyJudge5", help="Which dataset.")
|
||||||
parser.add_argument('--data_root', type=str, default='./')
|
parser.add_argument('--data_root', type=str, default='./')
|
||||||
|
=======
|
||||||
|
default="CUB_200_2011", help="Which dataset.")
|
||||||
|
parser.add_argument('--data_root', type=str, default='/data/pfc/fineGrained')
|
||||||
|
>>>>>>> develop
|
||||||
parser.add_argument("--model_type", choices=["ViT-B_16", "ViT-B_32", "ViT-L_16", "ViT-L_32", "ViT-H_14"],
|
parser.add_argument("--model_type", choices=["ViT-B_16", "ViT-B_32", "ViT-L_16", "ViT-L_32", "ViT-H_14"],
|
||||||
default="ViT-B_16",help="Which variant to use.")
|
default="ViT-B_16",help="Which variant to use.")
|
||||||
parser.add_argument("--pretrained_dir", type=str, default="./preckpts/ViT-B_16.npz",
|
parser.add_argument("--pretrained_dir", type=str, default="./preckpts/ViT-B_16.npz",
|
||||||
@ -297,7 +324,11 @@ def main():
|
|||||||
help="Total batch size for training.")
|
help="Total batch size for training.")
|
||||||
parser.add_argument("--eval_batch_size", default=8, type=int,
|
parser.add_argument("--eval_batch_size", default=8, type=int,
|
||||||
help="Total batch size for eval.")
|
help="Total batch size for eval.")
|
||||||
|
<<<<<<< HEAD
|
||||||
parser.add_argument("--eval_every", default=786, type=int,
|
parser.add_argument("--eval_every", default=786, type=int,
|
||||||
|
=======
|
||||||
|
parser.add_argument("--eval_every", default=200, type=int, #200
|
||||||
|
>>>>>>> develop
|
||||||
help="Run prediction on validation set every so many steps."
|
help="Run prediction on validation set every so many steps."
|
||||||
"Will always run one evaluation at the end of training.")
|
"Will always run one evaluation at the end of training.")
|
||||||
|
|
||||||
@ -305,7 +336,11 @@ def main():
|
|||||||
help="The initial learning rate for SGD.")
|
help="The initial learning rate for SGD.")
|
||||||
parser.add_argument("--weight_decay", default=0.00001, type=float,
|
parser.add_argument("--weight_decay", default=0.00001, type=float,
|
||||||
help="Weight deay if we apply some.")
|
help="Weight deay if we apply some.")
|
||||||
|
<<<<<<< HEAD
|
||||||
parser.add_argument("--num_steps", default=78600, type=int, #100000
|
parser.add_argument("--num_steps", default=78600, type=int, #100000
|
||||||
|
=======
|
||||||
|
parser.add_argument("--num_steps", default=40000, type=int, #100000
|
||||||
|
>>>>>>> develop
|
||||||
help="Total number of training epochs to perform.")
|
help="Total number of training epochs to perform.")
|
||||||
parser.add_argument("--decay_type", choices=["cosine", "linear"], default="cosine",
|
parser.add_argument("--decay_type", choices=["cosine", "linear"], default="cosine",
|
||||||
help="How to decay the learning rate.")
|
help="How to decay the learning rate.")
|
||||||
@ -331,8 +366,12 @@ def main():
|
|||||||
# Setup CUDA, GPU & distributed training
|
# Setup CUDA, GPU & distributed training
|
||||||
if args.local_rank == -1:
|
if args.local_rank == -1:
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
<<<<<<< HEAD
|
||||||
|
=======
|
||||||
|
#print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count())
|
||||||
|
>>>>>>> develop
|
||||||
args.n_gpu = torch.cuda.device_count()
|
args.n_gpu = torch.cuda.device_count()
|
||||||
print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count())
|
#print('torch.cuda.device_count()>>>>>>>>>>>>>>>>>>>>>>>>>', torch.cuda.device_count())
|
||||||
else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
||||||
torch.cuda.set_device(args.local_rank)
|
torch.cuda.set_device(args.local_rank)
|
||||||
device = torch.device("cuda", args.local_rank)
|
device = torch.device("cuda", args.local_rank)
|
||||||
|
Reference in New Issue
Block a user