Files
ieemoo-ai-searchv2/cirtorch/examples/train.py
2022-11-22 15:32:06 +08:00

581 lines
23 KiB
Python
Executable File

import sys
sys.path.append('/home/lc/project/Search_By_Image_Upgrade')
import argparse
import os
import shutil
import time
import math
import pickle
import pdb
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.models as models
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
from cirtorch.layers.loss import ContrastiveLoss, TripletLoss
from cirtorch.datasets.datahelpers import collate_tuples, cid2filename
from cirtorch.datasets.traindataset import TuplesDataset
from cirtorch.datasets.testdataset import configdataset
from cirtorch.utils.download import download_train, download_test
from cirtorch.utils.whiten import whitenlearn, whitenapply
from cirtorch.utils.evaluate import compute_map_and_print
from cirtorch.utils.general import get_data_root, htime
training_dataset_names = ['retrieval-SfM-120k']
test_datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
test_whiten_names = ['retrieval-SfM-30k', 'retrieval-SfM-120k']
model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
and callable(models.__dict__[name]))
pool_names = ['mac', 'spoc', 'gem', 'gemmp']
loss_names = ['contrastive', 'triplet']
optimizer_names = ['sgd', 'adam']
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Training')
# export directory, training and val datasets, test datasets
parser.add_argument('directory', metavar='EXPORT_DIR',default='models',
help='destination where trained network should be saved')
parser.add_argument('--training-dataset', '-d', metavar='DATASET', default='retrieval-SfM-120k', choices=training_dataset_names,
help='training dataset: ' +
' | '.join(training_dataset_names) +
' (default: retrieval-SfM-120k)')
parser.add_argument('--no-val', dest='val', action='store_false',default = False,
help='do not run validation')
parser.add_argument('--test-datasets', '-td', metavar='DATASETS', default='roxford5k,rparis6k',
help='comma separated list of test datasets: ' +
' | '.join(test_datasets_names) +
' (default: roxford5k,rparis6k)')
parser.add_argument('--test-whiten', metavar='DATASET', default='', choices=test_whiten_names,
help='dataset used to learn whitening for testing: ' +
' | '.join(test_whiten_names) +
' (default: None)')
parser.add_argument('--test-freq', default=1, type=int, metavar='N',
help='run test evaluation every N epochs (default: 1)')
# network architecture and initialization options
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50', choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: resnet101)')
parser.add_argument('--pool', '-p', metavar='POOL', default='gem', choices=pool_names,
help='pooling options: ' +
' | '.join(pool_names) +
' (default: gem)')
parser.add_argument('--local-whitening', '-lw', dest='local_whitening', action='store_true',
help='train model with learnable local whitening (linear layer) before the pooling')
parser.add_argument('--regional', '-r', dest='regional', action='store_true',
help='train model with regional pooling using fixed grid')
parser.add_argument('--whitening', '-w', dest='whitening', action='store_true',
help='train model with learnable whitening (linear layer) after the pooling')
parser.add_argument('--not-pretrained', dest='pretrained', action='store_false',
help='initialize model with random weights (default: pretrained on imagenet)')
parser.add_argument('--loss', '-l', metavar='LOSS', default='contrastive',
choices=loss_names,
help='training loss options: ' +
' | '.join(loss_names) +
' (default: contrastive)')
parser.add_argument('--loss-margin', '-lm', metavar='LM', default=0.7, type=float,
help='loss margin: (default: 0.7)')
# train/val options specific for image retrieval learning
parser.add_argument('--image-size', default=648, type=int, metavar='N', # 1024
help='maximum size of longer image side used for training (default: 1024)')
parser.add_argument('--neg-num', '-nn', default=5, type=int, metavar='N',
help='number of negative image per train/val tuple (default: 5)')
parser.add_argument('--query-size', '-qs', default=2000, type=int, metavar='N',
help='number of queries randomly drawn per one train epoch (default: 2000)')
parser.add_argument('--pool-size', '-ps', default=20000, type=int, metavar='N',
help='size of the pool for hard negative mining (default: 20000)')
# standard train/val options
parser.add_argument('--gpu-id', '-g', default='0,1', metavar='N',
help='gpu id used for training (default: 0)')
parser.add_argument('--workers', '-j', default=8, type=int, metavar='N',
help='number of data loading workers (default: 8)')
parser.add_argument('--epochs', default=100, type=int, metavar='N',
help='number of total epochs to run (default: 100)')
parser.add_argument('--batch-size', '-b', default=32, type=int, metavar='N',
help='number of (q,p,n1,...,nN) tuples in a mini-batch (default: 5)')
parser.add_argument('--update-every', '-u', default=1, type=int, metavar='N',
help='update model weights every N batches, used to handle really large batches, ' +
'batch_size effectively becomes update_every x batch_size (default: 1)')
parser.add_argument('--optimizer', '-o', metavar='OPTIMIZER', default='adam',
choices=optimizer_names,
help='optimizer options: ' +
' | '.join(optimizer_names) +
' (default: adam)')
parser.add_argument('--lr', '--learning-rate', default=1e-6, type=float,
metavar='LR', help='initial learning rate (default: 1e-6)')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-6, type=float,
metavar='W', help='weight decay (default: 1e-6)')
parser.add_argument('--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='FILENAME',
help='name of the latest checkpoint (default: None)')
min_loss = float('inf')
def main():
global args, min_loss
args = parser.parse_args()
# manually check if there are unknown test datasets
for dataset in args.test_datasets.split(','):
if dataset not in test_datasets_names:
raise ValueError('Unsupported or unknown test dataset: {}!'.format(dataset))
# check if test dataset are downloaded
# and download if they are not
download_train(get_data_root())
download_test(get_data_root())
# create export dir if it doesnt exist
directory = "{}".format(args.training_dataset)
directory += "_{}".format(args.arch)
directory += "_{}".format(args.pool)
if args.local_whitening:
directory += "_lwhiten"
if args.regional:
directory += "_r"
if args.whitening:
directory += "_whiten"
if not args.pretrained:
directory += "_notpretrained"
directory += "_{}_m{:.2f}".format(args.loss, args.loss_margin)
directory += "_{}_lr{:.1e}_wd{:.1e}".format(args.optimizer, args.lr, args.weight_decay)
directory += "_nnum{}_qsize{}_psize{}".format(args.neg_num, args.query_size, args.pool_size)
directory += "_bsize{}_uevery{}_imsize{}".format(args.batch_size, args.update_every, args.image_size)
args.directory = os.path.join(args.directory, directory)
print(">> Creating directory if it does not exist:\n>> '{}'".format(args.directory))
if not os.path.exists(args.directory):
os.makedirs(args.directory)
# set cuda visible device
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
# set random seeds
# TODO: maybe pass as argument in future implementation?
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)
# initialize model
if args.pretrained:
print(">> Using pre-trained model '{}'".format(args.arch))
else:
print(">> Using model from scratch (random weights) '{}'".format(args.arch))
model_params = {}
model_params['architecture'] = args.arch
model_params['pooling'] = args.pool
model_params['local_whitening'] = args.local_whitening
model_params['regional'] = args.regional
model_params['whitening'] = args.whitening
# model_params['mean'] = ... # will use default
# model_params['std'] = ... # will use default
model_params['pretrained'] = args.pretrained
model = init_network(model_params)
# move network to gpu
model.cuda()
# define loss function (criterion) and optimizer
if args.loss == 'contrastive':
criterion = ContrastiveLoss(margin=args.loss_margin).cuda()
elif args.loss == 'triplet':
criterion = TripletLoss(margin=args.loss_margin).cuda()
else:
raise(RuntimeError("Loss {} not available!".format(args.loss)))
# parameters split into features, pool, whitening
# IMPORTANT: no weight decay for pooling parameter p in GeM or regional-GeM
parameters = []
# add feature parameters
parameters.append({'params': model.features.parameters()})
# add local whitening if exists
if model.lwhiten is not None:
parameters.append({'params': model.lwhiten.parameters()})
# add pooling parameters (or regional whitening which is part of the pooling layer!)
if not args.regional:
# global, only pooling parameter p weight decay should be 0
if args.pool == 'gem':
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
elif args.pool == 'gemmp':
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
else:
# regional, pooling parameter p weight decay should be 0,
# and we want to add regional whitening if it is there
if args.pool == 'gem':
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
elif args.pool == 'gemmp':
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
if model.pool.whiten is not None:
parameters.append({'params': model.pool.whiten.parameters()})
# add final whitening if exists
if model.whiten is not None:
parameters.append({'params': model.whiten.parameters()})
# define optimizer
if args.optimizer == 'sgd':
optimizer = torch.optim.SGD(parameters, args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
elif args.optimizer == 'adam':
optimizer = torch.optim.Adam(parameters, args.lr, weight_decay=args.weight_decay)
# define learning rate decay schedule
# TODO: maybe pass as argument in future implementation?
exp_decay = math.exp(-0.01)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay)
# optionally resume from a checkpoint
start_epoch = 0
if args.resume:
args.resume = os.path.join(args.directory, args.resume)
if os.path.isfile(args.resume):
# load checkpoint weights and update model and optimizer
print(">> Loading checkpoint:\n>> '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
min_loss = checkpoint['min_loss']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
print(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
# important not to forget scheduler updating
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay, last_epoch=checkpoint['epoch']-1)
else:
print(">> No checkpoint found at '{}'".format(args.resume))
# Data loading code
normalize = transforms.Normalize(mean=model.meta['mean'], std=model.meta['std'])
transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
train_dataset = TuplesDataset(
name=args.training_dataset,
mode='train',
imsize=args.image_size,
nnum=args.neg_num,
qsize=args.query_size,
poolsize=args.pool_size,
transform=transform
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=True,
num_workers=args.workers, pin_memory=True, sampler=None,
drop_last=True, collate_fn=collate_tuples
)
if args.val:
val_dataset = TuplesDataset(
name=args.training_dataset,
mode='val',
imsize=args.image_size,
nnum=args.neg_num,
qsize=float('Inf'),
poolsize=float('Inf'),
transform=transform
)
val_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True,
drop_last=True, collate_fn=collate_tuples
)
# evaluate the network before starting
# this might not be necessary?
#test(args.test_datasets, model)
for epoch in range(start_epoch, args.epochs):
# set manual seeds per epoch
np.random.seed(epoch)
torch.manual_seed(epoch)
torch.cuda.manual_seed_all(epoch)
# adjust learning rate for each epoch
scheduler.step()
# # debug printing to check if everything ok
# lr_feat = optimizer.param_groups[0]['lr']
# lr_pool = optimizer.param_groups[1]['lr']
# print('>> Features lr: {:.2e}; Pooling lr: {:.2e}'.format(lr_feat, lr_pool))
# train for one epoch on train set
loss = train(train_loader, model, criterion, optimizer, epoch)
# evaluate on validation set
if args.val:
with torch.no_grad():
loss = validate(val_loader, model, criterion, epoch)
# evaluate on test datasets every test_freq epochs
#if (epoch + 1) % args.test_freq == 0:
# with torch.no_grad():
# test(args.test_datasets, model)
# remember best loss and save checkpoint
is_best = loss < min_loss
min_loss = min(loss, min_loss)
if (epoch+1)%10 == 0:
save_checkpoint({
'epoch': epoch + 1,
'meta': model.meta,
'state_dict': model.state_dict(),
'min_loss': min_loss,
'optimizer' : optimizer.state_dict(),
}, is_best, args.directory)
def train(train_loader, model, criterion, optimizer, epoch):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
# create tuples for training
avg_neg_distance = train_loader.dataset.create_epoch_tuples(model)
# switch to train mode
model.train()
model.apply(set_batchnorm_eval)
# zero out gradients
optimizer.zero_grad()
end = time.time()
for i, (input, target) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
nq = len(input) # number of training tuples
ni = len(input[0]) # number of images per tuple
for q in range(nq):
output = torch.zeros(model.meta['outputdim'], ni).cuda()
for imi in range(ni):
# compute output vector for image imi
output[:, imi] = model(input[q][imi].cuda()).squeeze()
# reducing memory consumption:
# compute loss for this query tuple only
# then, do backward pass for one tuple only
# each backward pass gradients will be accumulated
# the optimization step is performed for the full batch later
loss = criterion(output, target[q].cuda())
losses.update(loss.item())
loss.backward()
if (i + 1) % args.update_every == 0:
# do one step for multiple batches
# accumulated gradients are used
optimizer.step()
# zero out gradients so we can
# accumulate new ones over batches
optimizer.zero_grad()
# print('>> Train: [{0}][{1}/{2}]\t'
# 'Weight update performed'.format(
# epoch+1, i+1, len(train_loader)))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(train_loader):
print('>> Train: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
epoch+1, i+1, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses))
return losses.avg
def validate(val_loader, model, criterion, epoch):
batch_time = AverageMeter()
losses = AverageMeter()
# create tuples for validation
avg_neg_distance = val_loader.dataset.create_epoch_tuples(model)
# switch to evaluate mode
model.eval()
end = time.time()
for i, (input, target) in enumerate(val_loader):
nq = len(input) # number of training tuples
ni = len(input[0]) # number of images per tuple
output = torch.zeros(model.meta['outputdim'], nq*ni).cuda()
for q in range(nq):
for imi in range(ni):
# compute output vector for image imi of query q
output[:, q*ni + imi] = model(input[q][imi].cuda()).squeeze()
# no need to reduce memory consumption (no backward pass):
# compute loss for the full batch
loss = criterion(output, torch.cat(target).cuda())
# record loss
losses.update(loss.item()/nq, nq)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(val_loader):
print('>> Val: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
epoch+1, i+1, len(val_loader), batch_time=batch_time, loss=losses))
return losses.avg
def test(datasets, net):
print('>> Evaluating network on test datasets...')
# for testing we use image size of max 1024
image_size = 1024
# moving network to gpu and eval mode
net.cuda()
net.eval()
# set up the transform
normalize = transforms.Normalize(
mean=net.meta['mean'],
std=net.meta['std']
)
transform = transforms.Compose([
transforms.ToTensor(),
normalize
])
# compute whitening
if args.test_whiten:
start = time.time()
print('>> {}: Learning whitening...'.format(args.test_whiten))
# loading db
db_root = os.path.join(get_data_root(), 'train', args.test_whiten)
ims_root = os.path.join(db_root, 'ims')
db_fn = os.path.join(db_root, '{}-whiten.pkl'.format(args.test_whiten))
with open(db_fn, 'rb') as f:
db = pickle.load(f)
images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
# extract whitening vectors
print('>> {}: Extracting...'.format(args.test_whiten))
wvecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
# learning whitening
print('>> {}: Learning...'.format(args.test_whiten))
wvecs = wvecs.numpy()
m, P = whitenlearn(wvecs, db['qidxs'], db['pidxs'])
Lw = {'m': m, 'P': P}
print('>> {}: elapsed time: {}'.format(args.test_whiten, htime(time.time()-start)))
else:
Lw = None
# evaluate on test datasets
datasets = args.test_datasets.split(',')
for dataset in datasets:
start = time.time()
print('>> {}: Extracting...'.format(dataset))
# prepare config structure for the test dataset
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
# extract database and query vectors
print('>> {}: database images...'.format(dataset))
vecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
print('>> {}: query images...'.format(dataset))
qvecs = extract_vectors(net, qimages, image_size, transform, bbxs) # implemented with torch.no_grad
print('>> {}: Evaluating...'.format(dataset))
# convert to numpy
vecs = vecs.numpy()
qvecs = qvecs.numpy()
# search, rank, and print
scores = np.dot(vecs.T, qvecs)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset, ranks, cfg['gnd'])
if Lw is not None:
# whiten the vectors
vecs_lw = whitenapply(vecs, Lw['m'], Lw['P'])
qvecs_lw = whitenapply(qvecs, Lw['m'], Lw['P'])
# search, rank, and print
scores = np.dot(vecs_lw.T, qvecs_lw)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset + ' + whiten', ranks, cfg['gnd'])
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
def save_checkpoint(state, is_best, directory):
filename = os.path.join(directory, 'model_epoch%d.pth.tar' % state['epoch'])
torch.save(state, filename)
if is_best:
filename_best = os.path.join(directory, 'model_best.pth.tar')
shutil.copyfile(filename, filename_best)
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def set_batchnorm_eval(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
# freeze running mean and std:
# we do training one image at a time
# so the statistics would not be per batch
# hence we choose freezing (ie using imagenet statistics)
m.eval()
# # freeze parameters:
# # in fact no need to freeze scale and bias
# # they can be learned
# # that is why next two lines are commented
# for p in m.parameters():
# p.requires_grad = False
if __name__ == '__main__':
main()