first push

This commit is contained in:
2022-11-22 15:32:06 +08:00
commit f4eee81c45
81 changed files with 31537 additions and 0 deletions

BIN
cirtorch/.DS_Store vendored Normal file

Binary file not shown.

Binary file not shown.

6
cirtorch/__init__.py Executable file
View File

@ -0,0 +1,6 @@
from . import datasets, examples, layers, networks, utils
from .datasets import datahelpers, genericdataset, testdataset, traindataset
from .layers import functional, loss, normalization, pooling
from .networks import imageretrievalnet
from .utils import general, download, evaluate, whiten

0
cirtorch/datasets/__init__.py Executable file
View File

View File

@ -0,0 +1,56 @@
import os
from PIL import Image
import torch
def cid2filename(cid, prefix):
"""
Creates a training image path out of its CID name
Arguments
---------
cid : name of the image
prefix : root directory where images are saved
Returns
-------
filename : full image filename
"""
return os.path.join(prefix, cid[-2:], cid[-4:-2], cid[-6:-4], cid)
def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
def accimage_loader(path):
import accimage
try:
return accimage.Image(path)
except IOError:
# Potentially a decoding problem, fall back to PIL.Image
return pil_loader(path)
def default_loader(path):
from torchvision import get_image_backend
if get_image_backend() == 'accimage':
return accimage_loader(path)
else:
return pil_loader(path)
def imresize(img, imsize):
img.thumbnail((imsize, imsize), Image.ANTIALIAS)
return img
def flip(x, dim):
xsize = x.size()
dim = x.dim() + dim if dim < 0 else dim
x = x.view(-1, *xsize[dim:])
x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :]
return x.view(xsize)
def collate_tuples(batch):
if len(batch) == 1:
return [batch[0][0]], [batch[0][1]]
return [batch[i][0] for i in range(len(batch))], [batch[i][1] for i in range(len(batch))]

View File

@ -0,0 +1,121 @@
import os
import pdb
import torch
import torch.utils.data as data
from cirtorch.datasets.datahelpers import default_loader, imresize
class ImagesFromList(data.Dataset):
"""A generic data loader that loads images from a list
(Based on ImageFolder from pytorch)
Args:
root (string): Root directory path.
images (list): Relative image paths as strings.
imsize (int, Default: None): Defines the maximum size of longer image side
bbxs (list): List of (x1,y1,x2,y2) tuples to crop the query images
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
loader (callable, optional): A function to load an image given its path.
Attributes:
images_fn (list): List of full image filename
"""
def __init__(self, root, images, imsize=None, bbxs=None, transform=None, loader=default_loader):
images_fn = [os.path.join(root,images[i]) for i in range(len(images))]
if len(images_fn) == 0:
raise(RuntimeError("Dataset contains 0 images!"))
self.root = root
self.images = images
self.imsize = imsize
self.images_fn = images_fn
self.bbxs = bbxs
self.transform = transform
self.loader = loader
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
image (PIL): Loaded image
"""
path = self.images_fn[index]
img = self.loader(path)
imfullsize = max(img.size)
if self.bbxs is not None:
print('self.bbxs>>>ok')
img = img.crop(self.bbxs[index])
if self.imsize is not None:
if self.bbxs is not None:
print('self.bbxs and self.imsize>>>ok')
img = imresize(img, self.imsize * max(img.size) / imfullsize)
else:
print('not self.bbxs and self.imsize>>>ok')
img = imresize(img, self.imsize)
if self.transform is not None:
print('self.transform>>>>>ok')
img = self.transform(img)
return img, path
def __len__(self):
return len(self.images_fn)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of images: {}\n'.format(self.__len__())
fmt_str += ' Root Location: {}\n'.format(self.root)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
class ImagesFromDataList(data.Dataset):
"""A generic data loader that loads images given as an array of pytorch tensors
(Based on ImageFolder from pytorch)
Args:
images (list): Images as tensors.
transform (callable, optional): A function/transform that image as a tensors
and returns a transformed version. E.g, ``normalize`` with mean and std
"""
def __init__(self, images, transform=None):
if len(images) == 0:
raise(RuntimeError("Dataset contains 0 images!"))
self.images = images
self.transform = transform
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
image (Tensor): Loaded image
"""
img = self.images[index]
if self.transform is not None:
img = self.transform(img)
if len(img.size()):
img = img.unsqueeze(0)
return img
def __len__(self):
return len(self.images)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of images: {}\n'.format(self.__len__())
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str

View File

@ -0,0 +1,38 @@
import os
import pickle
DATASETS = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
def configdataset(dataset, dir_main):
dataset = dataset.lower()
if dataset not in DATASETS:
raise ValueError('Unknown dataset: {}!'.format(dataset))
# loading imlist, qimlist, and gnd, in cfg as a dict
gnd_fname = os.path.join(dir_main, dataset, 'gnd_{}.pkl'.format(dataset))
with open(gnd_fname, 'rb') as f:
cfg = pickle.load(f)
cfg['gnd_fname'] = gnd_fname
cfg['ext'] = '.jpg'
cfg['qext'] = '.jpg'
cfg['dir_data'] = os.path.join(dir_main, dataset)
cfg['dir_images'] = os.path.join(cfg['dir_data'], 'jpg')
cfg['n'] = len(cfg['imlist'])
cfg['nq'] = len(cfg['qimlist'])
cfg['im_fname'] = config_imname
cfg['qim_fname'] = config_qimname
cfg['dataset'] = dataset
return cfg
def config_imname(cfg, i):
return os.path.join(cfg['dir_images'], cfg['imlist'][i] + cfg['ext'])
def config_qimname(cfg, i):
return os.path.join(cfg['dir_images'], cfg['qimlist'][i] + cfg['qext'])

247
cirtorch/datasets/traindataset.py Executable file
View File

@ -0,0 +1,247 @@
import os
import pickle
import pdb
import numpy as np
import torch
import torch.utils.data as data
from cirtorch.datasets.datahelpers import default_loader, imresize, cid2filename
from cirtorch.datasets.genericdataset import ImagesFromList
from cirtorch.utils.general import get_data_root
class TuplesDataset(data.Dataset):
"""Data loader that loads training and validation tuples of
Radenovic etal ECCV16: CNN image retrieval learns from BoW
Args:
name (string): dataset name: 'retrieval-sfm-120k'
mode (string): 'train' or 'val' for training and validation parts of dataset
imsize (int, Default: None): Defines the maximum size of longer image side
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
loader (callable, optional): A function to load an image given its path.
nnum (int, Default:5): Number of negatives for a query image in a training tuple
qsize (int, Default:1000): Number of query images, ie number of (q,p,n1,...nN) tuples, to be processed in one epoch
poolsize (int, Default:10000): Pool size for negative images re-mining
Attributes:
images (list): List of full filenames for each image
clusters (list): List of clusterID per image
qpool (list): List of all query image indexes
ppool (list): List of positive image indexes, each corresponding to query at the same position in qpool
qidxs (list): List of qsize query image indexes to be processed in an epoch
pidxs (list): List of qsize positive image indexes, each corresponding to query at the same position in qidxs
nidxs (list): List of qsize tuples of negative images
Each nidxs tuple contains nnum images corresponding to query image at the same position in qidxs
Lists qidxs, pidxs, nidxs are refreshed by calling the ``create_epoch_tuples()`` method,
ie new q-p pairs are picked and negative images are remined
"""
def __init__(self, name, mode, imsize=None, nnum=5, qsize=2000, poolsize=20000, transform=None, loader=default_loader):
if not (mode == 'train' or mode == 'val'):
raise(RuntimeError("MODE should be either train or val, passed as string"))
if name.startswith('retrieval-SfM'):
# setting up paths
#data_root = get_data_root()
#db_root = os.path.join(data_root, 'train', name)
#ims_root = os.path.join(db_root, 'ims')
db_root = '/home/lc/project/Search_By_Image_Upgrade/cirtorch/IamgeRetrieval_dataset'
ims_root = '/home/lc/project/Search_By_Image_Upgrade/cirtorch/IamgeRetrieval_dataset/train'
# loading db
db_fn = os.path.join(db_root, '{}.pkl'.format('train'))
with open(db_fn, 'rb') as f:
db = pickle.load(f)[mode]
# setting fullpath for images
self.images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
#elif name.startswith('gl'):
## TODO: NOT IMPLEMENTED YET PROPOERLY (WITH AUTOMATIC DOWNLOAD)
# setting up paths
#db_root = '/mnt/fry2/users/datasets/landmarkscvprw18/recognition/'
#ims_root = os.path.join(db_root, 'images', 'train')
# loading db
#db_fn = os.path.join(db_root, '{}.pkl'.format('train'))
#with open(db_fn, 'rb') as f:
# db = pickle.load(f)[mode]
# setting fullpath for images
self.images = [os.path.join(ims_root, db['cids'][i]) for i in range(len(db['cids']))]
else:
raise(RuntimeError("Unknown dataset name!"))
# initializing tuples dataset
self.name = name
self.mode = mode
self.imsize = imsize
self.clusters = db['cluster']
self.qpool = db['qidxs']
self.ppool = db['pidxs']
## If we want to keep only unique q-p pairs
## However, ordering of pairs will change, although that is not important
# qpidxs = list(set([(self.qidxs[i], self.pidxs[i]) for i in range(len(self.qidxs))]))
# self.qidxs = [qpidxs[i][0] for i in range(len(qpidxs))]
# self.pidxs = [qpidxs[i][1] for i in range(len(qpidxs))]
# size of training subset for an epoch
self.nnum = nnum
self.qsize = min(qsize, len(self.qpool))
self.poolsize = min(poolsize, len(self.images))
self.qidxs = None
self.pidxs = None
self.nidxs = None
self.transform = transform
self.loader = loader
self.print_freq = 10
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
images tuple (q,p,n1,...,nN): Loaded train/val tuple at index of self.qidxs
"""
if self.__len__() == 0:
raise(RuntimeError("List qidxs is empty. Run ``dataset.create_epoch_tuples(net)`` method to create subset for train/val!"))
output = []
# query image
output.append(self.loader(self.images[self.qidxs[index]]))
# positive image
output.append(self.loader(self.images[self.pidxs[index]]))
# negative images
for i in range(len(self.nidxs[index])):
output.append(self.loader(self.images[self.nidxs[index][i]]))
if self.imsize is not None:
output = [imresize(img, self.imsize) for img in output]
if self.transform is not None:
output = [self.transform(output[i]).unsqueeze_(0) for i in range(len(output))]
target = torch.Tensor([-1, 1] + [0]*len(self.nidxs[index]))
return output, target
def __len__(self):
# if not self.qidxs:
# return 0
# return len(self.qidxs)
return self.qsize
def __repr__(self):
fmt_str = self.__class__.__name__ + '\n'
fmt_str += ' Name and mode: {} {}\n'.format(self.name, self.mode)
fmt_str += ' Number of images: {}\n'.format(len(self.images))
fmt_str += ' Number of training tuples: {}\n'.format(len(self.qpool))
fmt_str += ' Number of negatives per tuple: {}\n'.format(self.nnum)
fmt_str += ' Number of tuples processed in an epoch: {}\n'.format(self.qsize)
fmt_str += ' Pool size for negative remining: {}\n'.format(self.poolsize)
tmp = ' Transforms (if any): '
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
return fmt_str
def create_epoch_tuples(self, net):
print('>> Creating tuples for an epoch of {}-{}...'.format(self.name, self.mode))
print(">>>> used network: ")
print(net.meta_repr())
## ------------------------
## SELECTING POSITIVE PAIRS
## ------------------------
# draw qsize random queries for tuples
idxs2qpool = torch.randperm(len(self.qpool))[:self.qsize]
self.qidxs = [self.qpool[i] for i in idxs2qpool]
self.pidxs = [self.ppool[i] for i in idxs2qpool]
## ------------------------
## SELECTING NEGATIVE PAIRS
## ------------------------
# if nnum = 0 create dummy nidxs
# useful when only positives used for training
if self.nnum == 0:
self.nidxs = [[] for _ in range(len(self.qidxs))]
return 0
# draw poolsize random images for pool of negatives images
idxs2images = torch.randperm(len(self.images))[:self.poolsize]
# prepare network
net.cuda()
net.eval()
# no gradients computed, to reduce memory and increase speed
with torch.no_grad():
print('>> Extracting descriptors for query images...')
# prepare query loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=[self.images[i] for i in self.qidxs], imsize=self.imsize, transform=self.transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extract query vectors
qvecs = torch.zeros(net.meta['outputdim'], len(self.qidxs)).cuda()
for i, input in enumerate(loader):
#print('*********************',input,type(input))
#print('#######################',type(input))
qvecs[:, i] = net(input[0].cuda()).data.squeeze()
if (i+1) % self.print_freq == 0 or (i+1) == len(self.qidxs):
print('\r>>>> {}/{} done...'.format(i+1, len(self.qidxs)), end='')
print('')
print('>> Extracting descriptors for negative pool...')
# prepare negative pool data loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=[self.images[i] for i in idxs2images], imsize=self.imsize, transform=self.transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extract negative pool vectors
poolvecs = torch.zeros(net.meta['outputdim'], len(idxs2images)).cuda()
for i, input in enumerate(loader):
poolvecs[:, i] = net(input[0].cuda()).data.squeeze()
if (i+1) % self.print_freq == 0 or (i+1) == len(idxs2images):
print('\r>>>> {}/{} done...'.format(i+1, len(idxs2images)), end='')
print('')
print('>> Searching for hard negatives...')
# compute dot product scores and ranks on GPU
scores = torch.mm(poolvecs.t(), qvecs)
scores, ranks = torch.sort(scores, dim=0, descending=True)
avg_ndist = torch.tensor(0).float().cuda() # for statistics
n_ndist = torch.tensor(0).float().cuda() # for statistics
# selection of negative examples
self.nidxs = []
for q in range(len(self.qidxs)):
# do not use query cluster,
# those images are potentially positive
qcluster = self.clusters[self.qidxs[q]]
clusters = [qcluster]
nidxs = []
r = 0
while len(nidxs) < self.nnum:
potential = idxs2images[ranks[r, q]]
# take at most one image from the same cluster
if not self.clusters[potential] in clusters:
nidxs.append(potential)
clusters.append(self.clusters[potential])
avg_ndist += torch.pow(qvecs[:,q]-poolvecs[:,ranks[r, q]]+1e-6, 2).sum(dim=0).sqrt()
n_ndist += 1
r += 1
self.nidxs.append(nidxs)
print('>>>> Average negative l2-distance: {:.2f}'.format(avg_ndist/n_ndist))
print('>>>> Done')
return (avg_ndist/n_ndist).item() # return average negative l2-distance

0
cirtorch/examples/__init__.py Executable file
View File

266
cirtorch/examples/test.py Executable file
View File

@ -0,0 +1,266 @@
import argparse
import os
import time
import pickle
import pdb
import numpy as np
import torch
from torch.utils.model_zoo import load_url
from torchvision import transforms
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
from cirtorch.datasets.datahelpers import cid2filename
from cirtorch.datasets.testdataset import configdataset
from cirtorch.utils.download import download_train, download_test
from cirtorch.utils.whiten import whitenlearn, whitenapply
from cirtorch.utils.evaluate import compute_map_and_print
from cirtorch.utils.general import get_data_root, htime
PRETRAINED = {
'retrievalSfM120k-vgg16-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/retrievalSfM120k-vgg16-gem-b4dcdc6.pth',
'retrievalSfM120k-resnet101-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/retrievalSfM120k-resnet101-gem-b80fb85.pth',
# new networks with whitening learned end-to-end
'rSfM120k-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet50-gem-w-97bf910.pth',
'rSfM120k-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet101-gem-w-a155e54.pth',
'rSfM120k-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet152-gem-w-f39cada.pth',
'gl18-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet50-gem-w-83fdc30.pth',
'gl18-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet101-gem-w-a4d43db.pth',
'gl18-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet152-gem-w-21278d5.pth',
}
datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
whitening_names = ['retrieval-SfM-30k', 'retrieval-SfM-120k']
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Testing')
# network
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--network-path', '-npath', metavar='NETWORK',
help="pretrained network or network path (destination where network is saved)")
group.add_argument('--network-offtheshelf', '-noff', metavar='NETWORK',
help="off-the-shelf network, in the format 'ARCHITECTURE-POOLING' or 'ARCHITECTURE-POOLING-{reg-lwhiten-whiten}'," +
" examples: 'resnet101-gem' | 'resnet101-gem-reg' | 'resnet101-gem-whiten' | 'resnet101-gem-lwhiten' | 'resnet101-gem-reg-whiten'")
# test options
parser.add_argument('--datasets', '-d', metavar='DATASETS', default='oxford5k,paris6k',
help="comma separated list of test datasets: " +
" | ".join(datasets_names) +
" (default: 'oxford5k,paris6k')")
parser.add_argument('--image-size', '-imsize', default=1024, type=int, metavar='N',
help="maximum size of longer image side used for testing (default: 1024)")
parser.add_argument('--multiscale', '-ms', metavar='MULTISCALE', default='[1]',
help="use multiscale vectors for testing, " +
" examples: '[1]' | '[1, 1/2**(1/2), 1/2]' | '[1, 2**(1/2), 1/2**(1/2)]' (default: '[1]')")
parser.add_argument('--whitening', '-w', metavar='WHITENING', default=None, choices=whitening_names,
help="dataset used to learn whitening for testing: " +
" | ".join(whitening_names) +
" (default: None)")
# GPU ID
parser.add_argument('--gpu-id', '-g', default='0', metavar='N',
help="gpu id used for testing (default: '0')")
def main():
args = parser.parse_args()
# check if there are unknown datasets
for dataset in args.datasets.split(','):
if dataset not in datasets_names:
raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
# check if test dataset are downloaded
# and download if they are not
download_train(get_data_root())
download_test(get_data_root())
# setting up the visible GPU
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
# loading network from path
if args.network_path is not None:
print(">> Loading network:\n>>>> '{}'".format(args.network_path))
if args.network_path in PRETRAINED:
# pretrained networks (downloaded automatically)
state = load_url(PRETRAINED[args.network_path], model_dir=os.path.join(get_data_root(), 'networks'))
else:
# fine-tuned network from path
state = torch.load(args.network_path)
# parsing net params from meta
# architecture, pooling, mean, std required
# the rest has default values, in case that is doesnt exist
net_params = {}
net_params['architecture'] = state['meta']['architecture']
net_params['pooling'] = state['meta']['pooling']
net_params['local_whitening'] = state['meta'].get('local_whitening', False)
net_params['regional'] = state['meta'].get('regional', False)
net_params['whitening'] = state['meta'].get('whitening', False)
net_params['mean'] = state['meta']['mean']
net_params['std'] = state['meta']['std']
net_params['pretrained'] = False
# load network
net = init_network(net_params)
net.load_state_dict(state['state_dict'])
# if whitening is precomputed
if 'Lw' in state['meta']:
net.meta['Lw'] = state['meta']['Lw']
print(">>>> loaded network: ")
print(net.meta_repr())
# loading offtheshelf network
elif args.network_offtheshelf is not None:
# parse off-the-shelf parameters
offtheshelf = args.network_offtheshelf.split('-')
net_params = {}
net_params['architecture'] = offtheshelf[0]
net_params['pooling'] = offtheshelf[1]
net_params['local_whitening'] = 'lwhiten' in offtheshelf[2:]
net_params['regional'] = 'reg' in offtheshelf[2:]
net_params['whitening'] = 'whiten' in offtheshelf[2:]
net_params['pretrained'] = True
# load off-the-shelf network
print(">> Loading off-the-shelf network:\n>>>> '{}'".format(args.network_offtheshelf))
net = init_network(net_params)
print(">>>> loaded network: ")
print(net.meta_repr())
# setting up the multi-scale parameters
ms = list(eval(args.multiscale))
if len(ms)>1 and net.meta['pooling'] == 'gem' and not net.meta['regional'] and not net.meta['whitening']:
msp = net.pool.p.item()
print(">> Set-up multiscale:")
print(">>>> ms: {}".format(ms))
print(">>>> msp: {}".format(msp))
else:
msp = 1
# moving network to gpu and eval mode
net.cuda()
net.eval()
# set up the transform
normalize = transforms.Normalize(
mean=net.meta['mean'],
std=net.meta['std']
)
transform = transforms.Compose([
transforms.ToTensor(),
normalize
])
# compute whitening
if args.whitening is not None:
start = time.time()
if 'Lw' in net.meta and args.whitening in net.meta['Lw']:
print('>> {}: Whitening is precomputed, loading it...'.format(args.whitening))
if len(ms)>1:
Lw = net.meta['Lw'][args.whitening]['ms']
else:
Lw = net.meta['Lw'][args.whitening]['ss']
else:
# if we evaluate networks from path we should save/load whitening
# not to compute it every time
if args.network_path is not None:
whiten_fn = args.network_path + '_{}_whiten'.format(args.whitening)
if len(ms) > 1:
whiten_fn += '_ms'
whiten_fn += '.pth'
else:
whiten_fn = None
if whiten_fn is not None and os.path.isfile(whiten_fn):
print('>> {}: Whitening is precomputed, loading it...'.format(args.whitening))
Lw = torch.load(whiten_fn)
else:
print('>> {}: Learning whitening...'.format(args.whitening))
# loading db
db_root = os.path.join(get_data_root(), 'train', args.whitening)
ims_root = os.path.join(db_root, 'ims')
db_fn = os.path.join(db_root, '{}-whiten.pkl'.format(args.whitening))
with open(db_fn, 'rb') as f:
db = pickle.load(f)
images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
# extract whitening vectors
print('>> {}: Extracting...'.format(args.whitening))
wvecs = extract_vectors(net, images, args.image_size, transform, ms=ms, msp=msp)
# learning whitening
print('>> {}: Learning...'.format(args.whitening))
wvecs = wvecs.numpy()
m, P = whitenlearn(wvecs, db['qidxs'], db['pidxs'])
Lw = {'m': m, 'P': P}
# saving whitening if whiten_fn exists
if whiten_fn is not None:
print('>> {}: Saving to {}...'.format(args.whitening, whiten_fn))
torch.save(Lw, whiten_fn)
print('>> {}: elapsed time: {}'.format(args.whitening, htime(time.time()-start)))
else:
Lw = None
# evaluate on test datasets
datasets = args.datasets.split(',')
for dataset in datasets:
start = time.time()
print('>> {}: Extracting...'.format(dataset))
# prepare config structure for the test dataset
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
try:
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
except:
bbxs = None # for holidaysmanrot and copydays
# extract database and query vectors
print('>> {}: database images...'.format(dataset))
vecs = extract_vectors(net, images, args.image_size, transform, ms=ms, msp=msp)
print('>> {}: query images...'.format(dataset))
qvecs = extract_vectors(net, qimages, args.image_size, transform, bbxs=bbxs, ms=ms, msp=msp)
print('>> {}: Evaluating...'.format(dataset))
# convert to numpy
vecs = vecs.numpy()
qvecs = qvecs.numpy()
# search, rank, and print
scores = np.dot(vecs.T, qvecs)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset, ranks, cfg['gnd'])
if Lw is not None:
# whiten the vectors
vecs_lw = whitenapply(vecs, Lw['m'], Lw['P'])
qvecs_lw = whitenapply(qvecs, Lw['m'], Lw['P'])
# search, rank, and print
scores = np.dot(vecs_lw.T, qvecs_lw)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset + ' + whiten', ranks, cfg['gnd'])
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
if __name__ == '__main__':
main()

145
cirtorch/examples/test_e2e.py Executable file
View File

@ -0,0 +1,145 @@
import argparse
import os
import time
import pickle
import pdb
import numpy as np
import torch
from torch.utils.model_zoo import load_url
from torchvision import transforms
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
from cirtorch.datasets.testdataset import configdataset
from cirtorch.utils.download import download_train, download_test
from cirtorch.utils.evaluate import compute_map_and_print
from cirtorch.utils.general import get_data_root, htime
PRETRAINED = {
'rSfM120k-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet50-gem-w-97bf910.pth',
'rSfM120k-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet101-gem-w-a155e54.pth',
'rSfM120k-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet152-gem-w-f39cada.pth',
'gl18-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet50-gem-w-83fdc30.pth',
'gl18-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet101-gem-w-a4d43db.pth',
'gl18-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet152-gem-w-21278d5.pth',
}
datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Testing End-to-End')
# test options
parser.add_argument('--network', '-n', metavar='NETWORK',
help="network to be evaluated: " +
" | ".join(PRETRAINED.keys()))
parser.add_argument('--datasets', '-d', metavar='DATASETS', default='roxford5k,rparis6k',
help="comma separated list of test datasets: " +
" | ".join(datasets_names) +
" (default: 'roxford5k,rparis6k')")
parser.add_argument('--image-size', '-imsize', default=1024, type=int, metavar='N',
help="maximum size of longer image side used for testing (default: 1024)")
parser.add_argument('--multiscale', '-ms', metavar='MULTISCALE', default='[1]',
help="use multiscale vectors for testing, " +
" examples: '[1]' | '[1, 1/2**(1/2), 1/2]' | '[1, 2**(1/2), 1/2**(1/2)]' (default: '[1]')")
# GPU ID
parser.add_argument('--gpu-id', '-g', default='0', metavar='N',
help="gpu id used for testing (default: '0')")
def main():
args = parser.parse_args()
# check if there are unknown datasets
for dataset in args.datasets.split(','):
if dataset not in datasets_names:
raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
# check if test dataset are downloaded
# and download if they are not
download_train(get_data_root())
download_test(get_data_root())
# setting up the visible GPU
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
# loading network
# pretrained networks (downloaded automatically)
print(">> Loading network:\n>>>> '{}'".format(args.network))
state = load_url(PRETRAINED[args.network], model_dir=os.path.join(get_data_root(), 'networks'))
# state = torch.load(args.network)
# parsing net params from meta
# architecture, pooling, mean, std required
# the rest has default values, in case that is doesnt exist
net_params = {}
net_params['architecture'] = state['meta']['architecture']
net_params['pooling'] = state['meta']['pooling']
net_params['local_whitening'] = state['meta'].get('local_whitening', False)
net_params['regional'] = state['meta'].get('regional', False)
net_params['whitening'] = state['meta'].get('whitening', False)
net_params['mean'] = state['meta']['mean']
net_params['std'] = state['meta']['std']
net_params['pretrained'] = False
# network initialization
net = init_network(net_params)
net.load_state_dict(state['state_dict'])
print(">>>> loaded network: ")
print(net.meta_repr())
# setting up the multi-scale parameters
ms = list(eval(args.multiscale))
print(">>>> Evaluating scales: {}".format(ms))
# moving network to gpu and eval mode
net.cuda()
net.eval()
# set up the transform
normalize = transforms.Normalize(
mean=net.meta['mean'],
std=net.meta['std']
)
transform = transforms.Compose([
transforms.ToTensor(),
normalize
])
# evaluate on test datasets
datasets = args.datasets.split(',')
for dataset in datasets:
start = time.time()
print('>> {}: Extracting...'.format(dataset))
# prepare config structure for the test dataset
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
try:
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
except:
bbxs = None # for holidaysmanrot and copydays
# extract database and query vectors
print('>> {}: database images...'.format(dataset))
vecs = extract_vectors(net, images, args.image_size, transform, ms=ms)
print('>> {}: query images...'.format(dataset))
qvecs = extract_vectors(net, qimages, args.image_size, transform, bbxs=bbxs, ms=ms)
print('>> {}: Evaluating...'.format(dataset))
# convert to numpy
vecs = vecs.numpy()
qvecs = qvecs.numpy()
# search, rank, and print
scores = np.dot(vecs.T, qvecs)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset, ranks, cfg['gnd'])
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
if __name__ == '__main__':
main()

580
cirtorch/examples/train.py Executable file
View File

@ -0,0 +1,580 @@
import sys
sys.path.append('/home/lc/project/Search_By_Image_Upgrade')
import argparse
import os
import shutil
import time
import math
import pickle
import pdb
import numpy as np
import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.models as models
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
from cirtorch.layers.loss import ContrastiveLoss, TripletLoss
from cirtorch.datasets.datahelpers import collate_tuples, cid2filename
from cirtorch.datasets.traindataset import TuplesDataset
from cirtorch.datasets.testdataset import configdataset
from cirtorch.utils.download import download_train, download_test
from cirtorch.utils.whiten import whitenlearn, whitenapply
from cirtorch.utils.evaluate import compute_map_and_print
from cirtorch.utils.general import get_data_root, htime
training_dataset_names = ['retrieval-SfM-120k']
test_datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
test_whiten_names = ['retrieval-SfM-30k', 'retrieval-SfM-120k']
model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__")
and callable(models.__dict__[name]))
pool_names = ['mac', 'spoc', 'gem', 'gemmp']
loss_names = ['contrastive', 'triplet']
optimizer_names = ['sgd', 'adam']
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Training')
# export directory, training and val datasets, test datasets
parser.add_argument('directory', metavar='EXPORT_DIR',default='models',
help='destination where trained network should be saved')
parser.add_argument('--training-dataset', '-d', metavar='DATASET', default='retrieval-SfM-120k', choices=training_dataset_names,
help='training dataset: ' +
' | '.join(training_dataset_names) +
' (default: retrieval-SfM-120k)')
parser.add_argument('--no-val', dest='val', action='store_false',default = False,
help='do not run validation')
parser.add_argument('--test-datasets', '-td', metavar='DATASETS', default='roxford5k,rparis6k',
help='comma separated list of test datasets: ' +
' | '.join(test_datasets_names) +
' (default: roxford5k,rparis6k)')
parser.add_argument('--test-whiten', metavar='DATASET', default='', choices=test_whiten_names,
help='dataset used to learn whitening for testing: ' +
' | '.join(test_whiten_names) +
' (default: None)')
parser.add_argument('--test-freq', default=1, type=int, metavar='N',
help='run test evaluation every N epochs (default: 1)')
# network architecture and initialization options
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50', choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: resnet101)')
parser.add_argument('--pool', '-p', metavar='POOL', default='gem', choices=pool_names,
help='pooling options: ' +
' | '.join(pool_names) +
' (default: gem)')
parser.add_argument('--local-whitening', '-lw', dest='local_whitening', action='store_true',
help='train model with learnable local whitening (linear layer) before the pooling')
parser.add_argument('--regional', '-r', dest='regional', action='store_true',
help='train model with regional pooling using fixed grid')
parser.add_argument('--whitening', '-w', dest='whitening', action='store_true',
help='train model with learnable whitening (linear layer) after the pooling')
parser.add_argument('--not-pretrained', dest='pretrained', action='store_false',
help='initialize model with random weights (default: pretrained on imagenet)')
parser.add_argument('--loss', '-l', metavar='LOSS', default='contrastive',
choices=loss_names,
help='training loss options: ' +
' | '.join(loss_names) +
' (default: contrastive)')
parser.add_argument('--loss-margin', '-lm', metavar='LM', default=0.7, type=float,
help='loss margin: (default: 0.7)')
# train/val options specific for image retrieval learning
parser.add_argument('--image-size', default=648, type=int, metavar='N', # 1024
help='maximum size of longer image side used for training (default: 1024)')
parser.add_argument('--neg-num', '-nn', default=5, type=int, metavar='N',
help='number of negative image per train/val tuple (default: 5)')
parser.add_argument('--query-size', '-qs', default=2000, type=int, metavar='N',
help='number of queries randomly drawn per one train epoch (default: 2000)')
parser.add_argument('--pool-size', '-ps', default=20000, type=int, metavar='N',
help='size of the pool for hard negative mining (default: 20000)')
# standard train/val options
parser.add_argument('--gpu-id', '-g', default='0,1', metavar='N',
help='gpu id used for training (default: 0)')
parser.add_argument('--workers', '-j', default=8, type=int, metavar='N',
help='number of data loading workers (default: 8)')
parser.add_argument('--epochs', default=100, type=int, metavar='N',
help='number of total epochs to run (default: 100)')
parser.add_argument('--batch-size', '-b', default=32, type=int, metavar='N',
help='number of (q,p,n1,...,nN) tuples in a mini-batch (default: 5)')
parser.add_argument('--update-every', '-u', default=1, type=int, metavar='N',
help='update model weights every N batches, used to handle really large batches, ' +
'batch_size effectively becomes update_every x batch_size (default: 1)')
parser.add_argument('--optimizer', '-o', metavar='OPTIMIZER', default='adam',
choices=optimizer_names,
help='optimizer options: ' +
' | '.join(optimizer_names) +
' (default: adam)')
parser.add_argument('--lr', '--learning-rate', default=1e-6, type=float,
metavar='LR', help='initial learning rate (default: 1e-6)')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-6, type=float,
metavar='W', help='weight decay (default: 1e-6)')
parser.add_argument('--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='FILENAME',
help='name of the latest checkpoint (default: None)')
min_loss = float('inf')
def main():
global args, min_loss
args = parser.parse_args()
# manually check if there are unknown test datasets
for dataset in args.test_datasets.split(','):
if dataset not in test_datasets_names:
raise ValueError('Unsupported or unknown test dataset: {}!'.format(dataset))
# check if test dataset are downloaded
# and download if they are not
download_train(get_data_root())
download_test(get_data_root())
# create export dir if it doesnt exist
directory = "{}".format(args.training_dataset)
directory += "_{}".format(args.arch)
directory += "_{}".format(args.pool)
if args.local_whitening:
directory += "_lwhiten"
if args.regional:
directory += "_r"
if args.whitening:
directory += "_whiten"
if not args.pretrained:
directory += "_notpretrained"
directory += "_{}_m{:.2f}".format(args.loss, args.loss_margin)
directory += "_{}_lr{:.1e}_wd{:.1e}".format(args.optimizer, args.lr, args.weight_decay)
directory += "_nnum{}_qsize{}_psize{}".format(args.neg_num, args.query_size, args.pool_size)
directory += "_bsize{}_uevery{}_imsize{}".format(args.batch_size, args.update_every, args.image_size)
args.directory = os.path.join(args.directory, directory)
print(">> Creating directory if it does not exist:\n>> '{}'".format(args.directory))
if not os.path.exists(args.directory):
os.makedirs(args.directory)
# set cuda visible device
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
# set random seeds
# TODO: maybe pass as argument in future implementation?
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)
# initialize model
if args.pretrained:
print(">> Using pre-trained model '{}'".format(args.arch))
else:
print(">> Using model from scratch (random weights) '{}'".format(args.arch))
model_params = {}
model_params['architecture'] = args.arch
model_params['pooling'] = args.pool
model_params['local_whitening'] = args.local_whitening
model_params['regional'] = args.regional
model_params['whitening'] = args.whitening
# model_params['mean'] = ... # will use default
# model_params['std'] = ... # will use default
model_params['pretrained'] = args.pretrained
model = init_network(model_params)
# move network to gpu
model.cuda()
# define loss function (criterion) and optimizer
if args.loss == 'contrastive':
criterion = ContrastiveLoss(margin=args.loss_margin).cuda()
elif args.loss == 'triplet':
criterion = TripletLoss(margin=args.loss_margin).cuda()
else:
raise(RuntimeError("Loss {} not available!".format(args.loss)))
# parameters split into features, pool, whitening
# IMPORTANT: no weight decay for pooling parameter p in GeM or regional-GeM
parameters = []
# add feature parameters
parameters.append({'params': model.features.parameters()})
# add local whitening if exists
if model.lwhiten is not None:
parameters.append({'params': model.lwhiten.parameters()})
# add pooling parameters (or regional whitening which is part of the pooling layer!)
if not args.regional:
# global, only pooling parameter p weight decay should be 0
if args.pool == 'gem':
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
elif args.pool == 'gemmp':
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
else:
# regional, pooling parameter p weight decay should be 0,
# and we want to add regional whitening if it is there
if args.pool == 'gem':
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
elif args.pool == 'gemmp':
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
if model.pool.whiten is not None:
parameters.append({'params': model.pool.whiten.parameters()})
# add final whitening if exists
if model.whiten is not None:
parameters.append({'params': model.whiten.parameters()})
# define optimizer
if args.optimizer == 'sgd':
optimizer = torch.optim.SGD(parameters, args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
elif args.optimizer == 'adam':
optimizer = torch.optim.Adam(parameters, args.lr, weight_decay=args.weight_decay)
# define learning rate decay schedule
# TODO: maybe pass as argument in future implementation?
exp_decay = math.exp(-0.01)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay)
# optionally resume from a checkpoint
start_epoch = 0
if args.resume:
args.resume = os.path.join(args.directory, args.resume)
if os.path.isfile(args.resume):
# load checkpoint weights and update model and optimizer
print(">> Loading checkpoint:\n>> '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
start_epoch = checkpoint['epoch']
min_loss = checkpoint['min_loss']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
print(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
# important not to forget scheduler updating
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay, last_epoch=checkpoint['epoch']-1)
else:
print(">> No checkpoint found at '{}'".format(args.resume))
# Data loading code
normalize = transforms.Normalize(mean=model.meta['mean'], std=model.meta['std'])
transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
train_dataset = TuplesDataset(
name=args.training_dataset,
mode='train',
imsize=args.image_size,
nnum=args.neg_num,
qsize=args.query_size,
poolsize=args.pool_size,
transform=transform
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=True,
num_workers=args.workers, pin_memory=True, sampler=None,
drop_last=True, collate_fn=collate_tuples
)
if args.val:
val_dataset = TuplesDataset(
name=args.training_dataset,
mode='val',
imsize=args.image_size,
nnum=args.neg_num,
qsize=float('Inf'),
poolsize=float('Inf'),
transform=transform
)
val_loader = torch.utils.data.DataLoader(
val_dataset, batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True,
drop_last=True, collate_fn=collate_tuples
)
# evaluate the network before starting
# this might not be necessary?
#test(args.test_datasets, model)
for epoch in range(start_epoch, args.epochs):
# set manual seeds per epoch
np.random.seed(epoch)
torch.manual_seed(epoch)
torch.cuda.manual_seed_all(epoch)
# adjust learning rate for each epoch
scheduler.step()
# # debug printing to check if everything ok
# lr_feat = optimizer.param_groups[0]['lr']
# lr_pool = optimizer.param_groups[1]['lr']
# print('>> Features lr: {:.2e}; Pooling lr: {:.2e}'.format(lr_feat, lr_pool))
# train for one epoch on train set
loss = train(train_loader, model, criterion, optimizer, epoch)
# evaluate on validation set
if args.val:
with torch.no_grad():
loss = validate(val_loader, model, criterion, epoch)
# evaluate on test datasets every test_freq epochs
#if (epoch + 1) % args.test_freq == 0:
# with torch.no_grad():
# test(args.test_datasets, model)
# remember best loss and save checkpoint
is_best = loss < min_loss
min_loss = min(loss, min_loss)
if (epoch+1)%10 == 0:
save_checkpoint({
'epoch': epoch + 1,
'meta': model.meta,
'state_dict': model.state_dict(),
'min_loss': min_loss,
'optimizer' : optimizer.state_dict(),
}, is_best, args.directory)
def train(train_loader, model, criterion, optimizer, epoch):
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
# create tuples for training
avg_neg_distance = train_loader.dataset.create_epoch_tuples(model)
# switch to train mode
model.train()
model.apply(set_batchnorm_eval)
# zero out gradients
optimizer.zero_grad()
end = time.time()
for i, (input, target) in enumerate(train_loader):
# measure data loading time
data_time.update(time.time() - end)
nq = len(input) # number of training tuples
ni = len(input[0]) # number of images per tuple
for q in range(nq):
output = torch.zeros(model.meta['outputdim'], ni).cuda()
for imi in range(ni):
# compute output vector for image imi
output[:, imi] = model(input[q][imi].cuda()).squeeze()
# reducing memory consumption:
# compute loss for this query tuple only
# then, do backward pass for one tuple only
# each backward pass gradients will be accumulated
# the optimization step is performed for the full batch later
loss = criterion(output, target[q].cuda())
losses.update(loss.item())
loss.backward()
if (i + 1) % args.update_every == 0:
# do one step for multiple batches
# accumulated gradients are used
optimizer.step()
# zero out gradients so we can
# accumulate new ones over batches
optimizer.zero_grad()
# print('>> Train: [{0}][{1}/{2}]\t'
# 'Weight update performed'.format(
# epoch+1, i+1, len(train_loader)))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(train_loader):
print('>> Train: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
epoch+1, i+1, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses))
return losses.avg
def validate(val_loader, model, criterion, epoch):
batch_time = AverageMeter()
losses = AverageMeter()
# create tuples for validation
avg_neg_distance = val_loader.dataset.create_epoch_tuples(model)
# switch to evaluate mode
model.eval()
end = time.time()
for i, (input, target) in enumerate(val_loader):
nq = len(input) # number of training tuples
ni = len(input[0]) # number of images per tuple
output = torch.zeros(model.meta['outputdim'], nq*ni).cuda()
for q in range(nq):
for imi in range(ni):
# compute output vector for image imi of query q
output[:, q*ni + imi] = model(input[q][imi].cuda()).squeeze()
# no need to reduce memory consumption (no backward pass):
# compute loss for the full batch
loss = criterion(output, torch.cat(target).cuda())
# record loss
losses.update(loss.item()/nq, nq)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(val_loader):
print('>> Val: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
epoch+1, i+1, len(val_loader), batch_time=batch_time, loss=losses))
return losses.avg
def test(datasets, net):
print('>> Evaluating network on test datasets...')
# for testing we use image size of max 1024
image_size = 1024
# moving network to gpu and eval mode
net.cuda()
net.eval()
# set up the transform
normalize = transforms.Normalize(
mean=net.meta['mean'],
std=net.meta['std']
)
transform = transforms.Compose([
transforms.ToTensor(),
normalize
])
# compute whitening
if args.test_whiten:
start = time.time()
print('>> {}: Learning whitening...'.format(args.test_whiten))
# loading db
db_root = os.path.join(get_data_root(), 'train', args.test_whiten)
ims_root = os.path.join(db_root, 'ims')
db_fn = os.path.join(db_root, '{}-whiten.pkl'.format(args.test_whiten))
with open(db_fn, 'rb') as f:
db = pickle.load(f)
images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
# extract whitening vectors
print('>> {}: Extracting...'.format(args.test_whiten))
wvecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
# learning whitening
print('>> {}: Learning...'.format(args.test_whiten))
wvecs = wvecs.numpy()
m, P = whitenlearn(wvecs, db['qidxs'], db['pidxs'])
Lw = {'m': m, 'P': P}
print('>> {}: elapsed time: {}'.format(args.test_whiten, htime(time.time()-start)))
else:
Lw = None
# evaluate on test datasets
datasets = args.test_datasets.split(',')
for dataset in datasets:
start = time.time()
print('>> {}: Extracting...'.format(dataset))
# prepare config structure for the test dataset
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
# extract database and query vectors
print('>> {}: database images...'.format(dataset))
vecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
print('>> {}: query images...'.format(dataset))
qvecs = extract_vectors(net, qimages, image_size, transform, bbxs) # implemented with torch.no_grad
print('>> {}: Evaluating...'.format(dataset))
# convert to numpy
vecs = vecs.numpy()
qvecs = qvecs.numpy()
# search, rank, and print
scores = np.dot(vecs.T, qvecs)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset, ranks, cfg['gnd'])
if Lw is not None:
# whiten the vectors
vecs_lw = whitenapply(vecs, Lw['m'], Lw['P'])
qvecs_lw = whitenapply(qvecs, Lw['m'], Lw['P'])
# search, rank, and print
scores = np.dot(vecs_lw.T, qvecs_lw)
ranks = np.argsort(-scores, axis=0)
compute_map_and_print(dataset + ' + whiten', ranks, cfg['gnd'])
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
def save_checkpoint(state, is_best, directory):
filename = os.path.join(directory, 'model_epoch%d.pth.tar' % state['epoch'])
torch.save(state, filename)
if is_best:
filename_best = os.path.join(directory, 'model_best.pth.tar')
shutil.copyfile(filename, filename_best)
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def set_batchnorm_eval(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
# freeze running mean and std:
# we do training one image at a time
# so the statistics would not be per batch
# hence we choose freezing (ie using imagenet statistics)
m.eval()
# # freeze parameters:
# # in fact no need to freeze scale and bias
# # they can be learned
# # that is why next two lines are commented
# for p in m.parameters():
# p.requires_grad = False
if __name__ == '__main__':
main()

0
cirtorch/layers/__init__.py Executable file
View File

172
cirtorch/layers/functional.py Executable file
View File

@ -0,0 +1,172 @@
import math
import pdb
import torch
import torch.nn.functional as F
# --------------------------------------
# pooling
# --------------------------------------
def mac(x):
return F.max_pool2d(x, (x.size(-2), x.size(-1)))
# return F.adaptive_max_pool2d(x, (1,1)) # alternative
def spoc(x):
return F.avg_pool2d(x, (x.size(-2), x.size(-1)))
# return F.adaptive_avg_pool2d(x, (1,1)) # alternative
def gem(x, p=3, eps=1e-6):
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
# return F.lp_pool2d(F.threshold(x, eps, eps), p, (x.size(-2), x.size(-1))) # alternative
def rmac(x, L=3, eps=1e-6):
ovr = 0.4 # desired overlap of neighboring regions
steps = torch.Tensor([2, 3, 4, 5, 6, 7]) # possible regions for the long dimension
W = x.size(3)
H = x.size(2)
w = min(W, H)
w2 = math.floor(w/2.0 - 1)
b = (max(H, W)-w)/(steps-1)
(tmp, idx) = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension
# region overplus per dimension
Wd = 0;
Hd = 0;
if H < W:
Wd = idx.item() + 1
elif H > W:
Hd = idx.item() + 1
v = F.max_pool2d(x, (x.size(-2), x.size(-1)))
v = v / (torch.norm(v, p=2, dim=1, keepdim=True) + eps).expand_as(v)
for l in range(1, L+1):
wl = math.floor(2*w/(l+1))
wl2 = math.floor(wl/2 - 1)
if l+Wd == 1:
b = 0
else:
b = (W-wl)/(l+Wd-1)
cenW = torch.floor(wl2 + torch.Tensor(range(l-1+Wd+1))*b) - wl2 # center coordinates
if l+Hd == 1:
b = 0
else:
b = (H-wl)/(l+Hd-1)
cenH = torch.floor(wl2 + torch.Tensor(range(l-1+Hd+1))*b) - wl2 # center coordinates
for i_ in cenH.tolist():
for j_ in cenW.tolist():
if wl == 0:
continue
R = x[:,:,(int(i_)+torch.Tensor(range(wl)).long()).tolist(),:]
R = R[:,:,:,(int(j_)+torch.Tensor(range(wl)).long()).tolist()]
vt = F.max_pool2d(R, (R.size(-2), R.size(-1)))
vt = vt / (torch.norm(vt, p=2, dim=1, keepdim=True) + eps).expand_as(vt)
v += vt
return v
def roipool(x, rpool, L=3, eps=1e-6):
ovr = 0.4 # desired overlap of neighboring regions
steps = torch.Tensor([2, 3, 4, 5, 6, 7]) # possible regions for the long dimension
W = x.size(3)
H = x.size(2)
w = min(W, H)
w2 = math.floor(w/2.0 - 1)
b = (max(H, W)-w)/(steps-1)
_, idx = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension
# region overplus per dimension
Wd = 0;
Hd = 0;
if H < W:
Wd = idx.item() + 1
elif H > W:
Hd = idx.item() + 1
vecs = []
vecs.append(rpool(x).unsqueeze(1))
for l in range(1, L+1):
wl = math.floor(2*w/(l+1))
wl2 = math.floor(wl/2 - 1)
if l+Wd == 1:
b = 0
else:
b = (W-wl)/(l+Wd-1)
cenW = torch.floor(wl2 + torch.Tensor(range(l-1+Wd+1))*b).int() - wl2 # center coordinates
if l+Hd == 1:
b = 0
else:
b = (H-wl)/(l+Hd-1)
cenH = torch.floor(wl2 + torch.Tensor(range(l-1+Hd+1))*b).int() - wl2 # center coordinates
for i_ in cenH.tolist():
for j_ in cenW.tolist():
if wl == 0:
continue
vecs.append(rpool(x.narrow(2,i_,wl).narrow(3,j_,wl)).unsqueeze(1))
return torch.cat(vecs, dim=1)
# --------------------------------------
# normalization
# --------------------------------------
def l2n(x, eps=1e-6):
return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)
def powerlaw(x, eps=1e-6):
x = x + self.eps
return x.abs().sqrt().mul(x.sign())
# --------------------------------------
# loss
# --------------------------------------
def contrastive_loss(x, label, margin=0.7, eps=1e-6):
# x is D x N
dim = x.size(0) # D
nq = torch.sum(label.data==-1) # number of tuples
S = x.size(1) // nq # number of images per tuple including query: 1+1+n
x1 = x[:, ::S].permute(1,0).repeat(1,S-1).view((S-1)*nq,dim).permute(1,0)
idx = [i for i in range(len(label)) if label.data[i] != -1]
x2 = x[:, idx]
lbl = label[label!=-1]
dif = x1 - x2
D = torch.pow(dif+eps, 2).sum(dim=0).sqrt()
y = 0.5*lbl*torch.pow(D,2) + 0.5*(1-lbl)*torch.pow(torch.clamp(margin-D, min=0),2)
y = torch.sum(y)
return y
def triplet_loss(x, label, margin=0.1):
# x is D x N
dim = x.size(0) # D
nq = torch.sum(label.data==-1).item() # number of tuples
S = x.size(1) // nq # number of images per tuple including query: 1+1+n
xa = x[:, label.data==-1].permute(1,0).repeat(1,S-2).view((S-2)*nq,dim).permute(1,0)
xp = x[:, label.data==1].permute(1,0).repeat(1,S-2).view((S-2)*nq,dim).permute(1,0)
xn = x[:, label.data==0]
dist_pos = torch.sum(torch.pow(xa - xp, 2), dim=0)
dist_neg = torch.sum(torch.pow(xa - xn, 2), dim=0)
return torch.sum(torch.clamp(dist_pos - dist_neg + margin, min=0))

48
cirtorch/layers/loss.py Executable file
View File

@ -0,0 +1,48 @@
import torch
import torch.nn as nn
import cirtorch.layers.functional as LF
# --------------------------------------
# Loss/Error layers
# --------------------------------------
class ContrastiveLoss(nn.Module):
r"""CONTRASTIVELOSS layer that computes contrastive loss for a batch of images:
Q query tuples, each packed in the form of (q,p,n1,..nN)
Args:
x: tuples arranges in columns as [q,p,n1,nN, ... ]
label: -1 for query, 1 for corresponding positive, 0 for corresponding negative
margin: contrastive loss margin. Default: 0.7
>>> contrastive_loss = ContrastiveLoss(margin=0.7)
>>> input = torch.randn(128, 35, requires_grad=True)
>>> label = torch.Tensor([-1, 1, 0, 0, 0, 0, 0] * 5)
>>> output = contrastive_loss(input, label)
>>> output.backward()
"""
def __init__(self, margin=0.7, eps=1e-6):
super(ContrastiveLoss, self).__init__()
self.margin = margin
self.eps = eps
def forward(self, x, label):
return LF.contrastive_loss(x, label, margin=self.margin, eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'margin=' + '{:.4f}'.format(self.margin) + ')'
class TripletLoss(nn.Module):
def __init__(self, margin=0.1):
super(TripletLoss, self).__init__()
self.margin = margin
def forward(self, x, label):
return LF.triplet_loss(x, label, margin=self.margin)
def __repr__(self):
return self.__class__.__name__ + '(' + 'margin=' + '{:.4f}'.format(self.margin) + ')'

View File

@ -0,0 +1,33 @@
import torch
import torch.nn as nn
import cirtorch.layers.functional as LF
# --------------------------------------
# Normalization layers
# --------------------------------------
class L2N(nn.Module):
def __init__(self, eps=1e-6):
super(L2N,self).__init__()
self.eps = eps
def forward(self, x):
return LF.l2n(x, eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'
class PowerLaw(nn.Module):
def __init__(self, eps=1e-6):
super(PowerLaw, self).__init__()
self.eps = eps
def forward(self, x):
return LF.powerlaw(x, eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'

113
cirtorch/layers/pooling.py Executable file
View File

@ -0,0 +1,113 @@
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
import cirtorch.layers.functional as LF
from cirtorch.layers.normalization import L2N
# --------------------------------------
# Pooling layers
# --------------------------------------
class MAC(nn.Module):
def __init__(self):
super(MAC,self).__init__()
def forward(self, x):
return LF.mac(x)
def __repr__(self):
return self.__class__.__name__ + '()'
class SPoC(nn.Module):
def __init__(self):
super(SPoC,self).__init__()
def forward(self, x):
return LF.spoc(x)
def __repr__(self):
return self.__class__.__name__ + '()'
class GeM(nn.Module):
def __init__(self, p=3, eps=1e-6):
super(GeM,self).__init__()
self.p = Parameter(torch.ones(1)*p)
self.eps = eps
def forward(self, x):
return LF.gem(x, p=self.p, eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'
class GeMmp(nn.Module):
def __init__(self, p=3, mp=1, eps=1e-6):
super(GeMmp,self).__init__()
self.p = Parameter(torch.ones(mp)*p)
self.mp = mp
self.eps = eps
def forward(self, x):
return LF.gem(x, p=self.p.unsqueeze(-1).unsqueeze(-1), eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'p=' + '[{}]'.format(self.mp) + ', ' + 'eps=' + str(self.eps) + ')'
class RMAC(nn.Module):
def __init__(self, L=3, eps=1e-6):
super(RMAC,self).__init__()
self.L = L
self.eps = eps
def forward(self, x):
return LF.rmac(x, L=self.L, eps=self.eps)
def __repr__(self):
return self.__class__.__name__ + '(' + 'L=' + '{}'.format(self.L) + ')'
class Rpool(nn.Module):
def __init__(self, rpool, whiten=None, L=3, eps=1e-6):
super(Rpool,self).__init__()
self.rpool = rpool
self.L = L
self.whiten = whiten
self.norm = L2N()
self.eps = eps
def forward(self, x, aggregate=True):
# features -> roipool
o = LF.roipool(x, self.rpool, self.L, self.eps) # size: #im, #reg, D, 1, 1
# concatenate regions from all images in the batch
s = o.size()
o = o.view(s[0]*s[1], s[2], s[3], s[4]) # size: #im x #reg, D, 1, 1
# rvecs -> norm
o = self.norm(o)
# rvecs -> whiten -> norm
if self.whiten is not None:
o = self.norm(self.whiten(o.squeeze(-1).squeeze(-1)))
# reshape back to regions per image
o = o.view(s[0], s[1], s[2], s[3], s[4]) # size: #im, #reg, D, 1, 1
# aggregate regions into a single global vector per image
if aggregate:
# rvecs -> sumpool -> norm
o = self.norm(o.sum(1, keepdim=False)) # size: #im, D, 1, 1
return o
def __repr__(self):
return super(Rpool, self).__repr__() + '(' + 'L=' + '{}'.format(self.L) + ')'

0
cirtorch/networks/__init__.py Executable file
View File

View File

@ -0,0 +1,427 @@
import os
import pdb
import numpy as np
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torchvision
from cirtorch.layers.pooling import MAC, SPoC, GeM, GeMmp, RMAC, Rpool
from cirtorch.layers.normalization import L2N, PowerLaw
from cirtorch.datasets.genericdataset import ImagesFromList
from cirtorch.utils.general import get_data_root
from cirtorch.datasets.datahelpers import default_loader, imresize
from PIL import Image
#from ModelHelper.Common.CommonUtils.ImageAugmentation import Padding
import cv2
# for some models, we have imported features (convolutions) from caffe because the image retrieval performance is higher for them
FEATURES = {
'vgg16': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-vgg16-features-d369c8e.pth',
'resnet50': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet50-features-ac468af.pth',
'resnet101': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet101-features-10a101d.pth',
'resnet152': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet152-features-1011020.pth',
}
# TODO: pre-compute for more architectures and properly test variations (pre l2norm, post l2norm)
# pre-computed local pca whitening that can be applied before the pooling layer
L_WHITENING = {
'resnet101': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-9f830ef.pth',
# no pre l2 norm
# 'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-da5c935.pth', # with pre l2 norm
}
# possible global pooling layers, each on of these can be made regional
POOLING = {
'mac': MAC,
'spoc': SPoC,
'gem': GeM,
'gemmp': GeMmp,
'rmac': RMAC,
}
# TODO: pre-compute for: resnet50-gem-r, resnet50-mac-r, vgg16-mac-r, alexnet-mac-r
# pre-computed regional whitening, for most commonly used architectures and pooling methods
R_WHITENING = {
'alexnet-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-rwhiten-c8cf7e2.pth',
'vgg16-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-rwhiten-19b204e.pth',
'resnet101-mac-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-rwhiten-7f1ed8c.pth',
'resnet101-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-rwhiten-adace84.pth',
}
# TODO: pre-compute for more architectures
# pre-computed final (global) whitening, for most commonly used architectures and pooling methods
WHITENING = {
'alexnet-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-whiten-454ad53.pth',
'alexnet-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-whiten-4c9126b.pth',
'vgg16-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-whiten-eaa6695.pth',
'vgg16-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-whiten-83582df.pth',
'resnet50-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet50-gem-whiten-f15da7b.pth',
'resnet101-mac-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-whiten-9df41d3.pth',
'resnet101-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-whiten-22ab0c1.pth',
'resnet101-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-whiten-b379c0a.pth',
'resnet101-gemmp': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gemmp-whiten-770f53c.pth',
'resnet152-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet152-gem-whiten-abe7b93.pth',
'densenet121-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet121-gem-whiten-79e3eea.pth',
'densenet169-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet169-gem-whiten-6b2a76a.pth',
'densenet201-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet201-gem-whiten-22ea45c.pth',
}
# output dimensionality for supported architectures
OUTPUT_DIM = {
'alexnet': 256,
'vgg11': 512,
'vgg13': 512,
'vgg16': 512,
'vgg19': 512,
'resnet18': 512,
'resnet34': 512,
'resnet50': 2048,
'resnet101': 2048,
'resnet152': 2048,
'densenet121': 1024,
'densenet169': 1664,
'densenet201': 1920,
'densenet161': 2208, # largest densenet
'squeezenet1_0': 512,
'squeezenet1_1': 512,
}
class ImageRetrievalNet(nn.Module):
def __init__(self, features, lwhiten, pool, whiten, meta):
super(ImageRetrievalNet, self).__init__()
self.features = nn.Sequential(*features)
self.lwhiten = lwhiten
self.pool = pool
self.whiten = whiten
self.norm = L2N()
self.meta = meta
def forward(self, x):
# x -> features
o = self.features(x)
# TODO: properly test (with pre-l2norm and/or post-l2norm)
# if lwhiten exist: features -> local whiten
if self.lwhiten is not None:
# o = self.norm(o)
s = o.size()
o = o.permute(0, 2, 3, 1).contiguous().view(-1, s[1])
o = self.lwhiten(o)
o = o.view(s[0], s[2], s[3], self.lwhiten.out_features).permute(0, 3, 1, 2)
# o = self.norm(o)
# features -> pool -> norm
o = self.norm(self.pool(o)).squeeze(-1).squeeze(-1)
# if whiten exist: pooled features -> whiten -> norm
if self.whiten is not None:
o = self.norm(self.whiten(o))
# permute so that it is Dx1 column vector per image (DxN if many images)
return o.permute(1, 0)
def __repr__(self):
tmpstr = super(ImageRetrievalNet, self).__repr__()[:-1]
tmpstr += self.meta_repr()
tmpstr = tmpstr + ')'
return tmpstr
def meta_repr(self):
tmpstr = ' (' + 'meta' + '): dict( \n' # + self.meta.__repr__() + '\n'
tmpstr += ' architecture: {}\n'.format(self.meta['architecture'])
tmpstr += ' local_whitening: {}\n'.format(self.meta['local_whitening'])
tmpstr += ' pooling: {}\n'.format(self.meta['pooling'])
tmpstr += ' regional: {}\n'.format(self.meta['regional'])
tmpstr += ' whitening: {}\n'.format(self.meta['whitening'])
tmpstr += ' outputdim: {}\n'.format(self.meta['outputdim'])
tmpstr += ' mean: {}\n'.format(self.meta['mean'])
tmpstr += ' std: {}\n'.format(self.meta['std'])
tmpstr = tmpstr + ' )\n'
return tmpstr
def init_network(params):
# parse params with default values
architecture = params.get('architecture', 'resnet101')
local_whitening = params.get('local_whitening', False)
pooling = params.get('pooling', 'gem')
regional = params.get('regional', False)
whitening = params.get('whitening', False)
mean = params.get('mean', [0.485, 0.456, 0.406])
std = params.get('std', [0.229, 0.224, 0.225])
pretrained = params.get('pretrained', True)
# get output dimensionality size
dim = OUTPUT_DIM[architecture]
# loading network from torchvision
if pretrained:
if architecture not in FEATURES:
# initialize with network pretrained on imagenet in pytorch
net_in = getattr(torchvision.models, architecture)(pretrained=True)
else:
# initialize with random weights, later on we will fill features with custom pretrained network
net_in = getattr(torchvision.models, architecture)(pretrained=False)
else:
# initialize with random weights
net_in = getattr(torchvision.models, architecture)(pretrained=False)
# initialize features
# take only convolutions for features,
# always ends with ReLU to make last activations non-negative
if architecture.startswith('alexnet'):
features = list(net_in.features.children())[:-1]
elif architecture.startswith('vgg'):
features = list(net_in.features.children())[:-1]
elif architecture.startswith('resnet'):
features = list(net_in.children())[:-2]
elif architecture.startswith('densenet'):
features = list(net_in.features.children())
features.append(nn.ReLU(inplace=True))
elif architecture.startswith('squeezenet'):
features = list(net_in.features.children())
else:
raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))
# initialize local whitening
if local_whitening:
lwhiten = nn.Linear(dim, dim, bias=True)
# TODO: lwhiten with possible dimensionality reduce
if pretrained:
lw = architecture
if lw in L_WHITENING:
print(">> {}: for '{}' custom computed local whitening '{}' is used"
.format(os.path.basename(__file__), lw, os.path.basename(L_WHITENING[lw])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
lwhiten.load_state_dict(model_zoo.load_url(L_WHITENING[lw], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no local whitening computed, random weights are used"
.format(os.path.basename(__file__), lw))
else:
lwhiten = None
# initialize pooling
if pooling == 'gemmp':
pool = POOLING[pooling](mp=dim)
else:
pool = POOLING[pooling]()
# initialize regional pooling
if regional:
rpool = pool
rwhiten = nn.Linear(dim, dim, bias=True)
# TODO: rwhiten with possible dimensionality reduce
if pretrained:
rw = '{}-{}-r'.format(architecture, pooling)
if rw in R_WHITENING:
print(">> {}: for '{}' custom computed regional whitening '{}' is used"
.format(os.path.basename(__file__), rw, os.path.basename(R_WHITENING[rw])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
rwhiten.load_state_dict(model_zoo.load_url(R_WHITENING[rw], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no regional whitening computed, random weights are used"
.format(os.path.basename(__file__), rw))
pool = Rpool(rpool, rwhiten)
# initialize whitening
if whitening:
whiten = nn.Linear(dim, dim, bias=True)
# TODO: whiten with possible dimensionality reduce
if pretrained:
w = architecture
if local_whitening:
w += '-lw'
w += '-' + pooling
if regional:
w += '-r'
if w in WHITENING:
print(">> {}: for '{}' custom computed whitening '{}' is used"
.format(os.path.basename(__file__), w, os.path.basename(WHITENING[w])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
whiten.load_state_dict(model_zoo.load_url(WHITENING[w], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no whitening computed, random weights are used"
.format(os.path.basename(__file__), w))
else:
whiten = None
# create meta information to be stored in the network
meta = {
'architecture': architecture,
'local_whitening': local_whitening,
'pooling': pooling,
'regional': regional,
'whitening': whitening,
'mean': mean,
'std': std,
'outputdim': dim,
}
# create a generic image retrieval network
net = ImageRetrievalNet(features, lwhiten, pool, whiten, meta)
# initialize features with custom pretrained network if needed
if pretrained and architecture in FEATURES:
print(">> {}: for '{}' custom pretrained features '{}' are used"
.format(os.path.basename(__file__), architecture, os.path.basename(FEATURES[architecture])))
model_dir = os.path.join(get_data_root(), 'networks')
net.features.load_state_dict(model_zoo.load_url(FEATURES[architecture], model_dir=model_dir))
return net
def extract_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
if torch.cuda.is_available():
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=1, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = torch.zeros(net.meta['outputdim'], len(images))
img_paths = list()
for i, (input, path) in enumerate(loader):
#print(i)
if torch.cuda.is_available():
input = input.cuda()
if len(ms) == 1 and ms[0] == 1:
vecs[:, i] = extract_ss(net, input)
else:
vecs[:, i] = extract_ms(net, input, ms, msp)
img_paths.append(path)
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
imgs = list()
for one in img_paths:
imgs += one
return vecs, imgs
def extract_vectors_o(net, image, size, tranform, bbxs = None, ms=[1], msp = 1, print_freq=10):
if torch.cuda.is_available():
net.cuda()
net.eval()
#image = cv2.resize(image, (size, size))
if type(image) == np.ndarray:
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
image = imresize(image, size)
image = tranform(image)
image = image.unsqueeze(0)
#print('image>>>>>>>', image)
#print('image>>>>>>>', image.shape)
with torch.no_grad():
#vecs = torch.zeros(net.meta['outputdim'], len(image))
if torch.cuda.is_available():
image = image.cuda()
if len(ms) == 1 and ms[0] == 1:
vecs = extract_ss(net, image)
else:
vecs = extract_ms(net, image, ms, msp)
return vecs
def extract_ss(net, input):
#return net(input).cpu().data.squeeze()
return net(input).cuda().data.squeeze()
def extract_ms(net, input, ms, msp):
v = torch.zeros(net.meta['outputdim'])
for s in ms:
if s == 1:
input_t = input.clone()
else:
input_t = nn.functional.interpolate(input, scale_factor=s, mode='bilinear', align_corners=False)
v += net(input_t).pow(msp).cpu().data.squeeze()
v /= len(ms)
v = v.pow(1. / msp)
v /= v.norm()
return v
def extract_regional_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = []
for i, input in enumerate(loader):
input = input.cuda()
if len(ms) == 1:
vecs.append(extract_ssr(net, input))
else:
# TODO: not implemented yet
# vecs.append(extract_msr(net, input, ms, msp))
raise NotImplementedError
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
print('')
return vecs
def extract_ssr(net, input):
return net.pool(net.features(input), aggregate=False).squeeze(0).squeeze(-1).squeeze(-1).permute(1, 0).cpu().data
def extract_local_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = []
for i, input in enumerate(loader):
input = input.cuda()
if len(ms) == 1:
vecs.append(extract_ssl(net, input))
else:
# TODO: not implemented yet
# vecs.append(extract_msl(net, input, ms, msp))
raise NotImplementedError
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
print('')
return vecs
def extract_ssl(net, input):
return net.norm(net.features(input)).squeeze(0).view(net.meta['outputdim'], -1).cpu().data

View File

@ -0,0 +1,392 @@
import os
import pdb
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torchvision
from cirtorch.layers.pooling import MAC, SPoC, GeM, GeMmp, RMAC, Rpool
from cirtorch.layers.normalization import L2N, PowerLaw
from cirtorch.datasets.genericdataset import ImagesFromList
from cirtorch.utils.general import get_data_root
# for some models, we have imported features (convolutions) from caffe because the image retrieval performance is higher for them
FEATURES = {
'vgg16' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-vgg16-features-d369c8e.pth',
'resnet50' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet50-features-ac468af.pth',
'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet101-features-10a101d.pth',
'resnet152' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet152-features-1011020.pth',
}
# TODO: pre-compute for more architectures and properly test variations (pre l2norm, post l2norm)
# pre-computed local pca whitening that can be applied before the pooling layer
L_WHITENING = {
'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-9f830ef.pth', # no pre l2 norm
# 'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-da5c935.pth', # with pre l2 norm
}
# possible global pooling layers, each on of these can be made regional
POOLING = {
'mac' : MAC,
'spoc' : SPoC,
'gem' : GeM,
'gemmp' : GeMmp,
'rmac' : RMAC,
}
# TODO: pre-compute for: resnet50-gem-r, resnet50-mac-r, vgg16-mac-r, alexnet-mac-r
# pre-computed regional whitening, for most commonly used architectures and pooling methods
R_WHITENING = {
'alexnet-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-rwhiten-c8cf7e2.pth',
'vgg16-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-rwhiten-19b204e.pth',
'resnet101-mac-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-rwhiten-7f1ed8c.pth',
'resnet101-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-rwhiten-adace84.pth',
}
# TODO: pre-compute for more architectures
# pre-computed final (global) whitening, for most commonly used architectures and pooling methods
WHITENING = {
'alexnet-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-whiten-454ad53.pth',
'alexnet-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-whiten-4c9126b.pth',
'vgg16-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-whiten-eaa6695.pth',
'vgg16-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-whiten-83582df.pth',
'resnet50-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet50-gem-whiten-f15da7b.pth',
'resnet101-mac-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-whiten-9df41d3.pth',
'resnet101-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-whiten-22ab0c1.pth',
'resnet101-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-whiten-b379c0a.pth',
'resnet101-gemmp' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gemmp-whiten-770f53c.pth',
'resnet152-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet152-gem-whiten-abe7b93.pth',
'densenet121-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet121-gem-whiten-79e3eea.pth',
'densenet169-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet169-gem-whiten-6b2a76a.pth',
'densenet201-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet201-gem-whiten-22ea45c.pth',
}
# output dimensionality for supported architectures
OUTPUT_DIM = {
'alexnet' : 256,
'vgg11' : 512,
'vgg13' : 512,
'vgg16' : 512,
'vgg19' : 512,
'resnet18' : 512,
'resnet34' : 512,
'resnet50' : 2048,
'resnet101' : 2048,
'resnet152' : 2048,
'densenet121' : 1024,
'densenet169' : 1664,
'densenet201' : 1920,
'densenet161' : 2208, # largest densenet
'squeezenet1_0' : 512,
'squeezenet1_1' : 512,
}
class ImageRetrievalNet(nn.Module):
def __init__(self, features, lwhiten, pool, whiten, meta):
super(ImageRetrievalNet, self).__init__()
self.features = nn.Sequential(*features)
self.lwhiten = lwhiten
self.pool = pool
self.whiten = whiten
self.norm = L2N()
self.meta = meta
def forward(self, x):
# x -> features
o = self.features(x)
# TODO: properly test (with pre-l2norm and/or post-l2norm)
# if lwhiten exist: features -> local whiten
if self.lwhiten is not None:
# o = self.norm(o)
s = o.size()
o = o.permute(0,2,3,1).contiguous().view(-1, s[1])
o = self.lwhiten(o)
o = o.view(s[0],s[2],s[3],self.lwhiten.out_features).permute(0,3,1,2)
# o = self.norm(o)
# features -> pool -> norm
o = self.norm(self.pool(o)).squeeze(-1).squeeze(-1)
# if whiten exist: pooled features -> whiten -> norm
if self.whiten is not None:
o = self.norm(self.whiten(o))
# permute so that it is Dx1 column vector per image (DxN if many images)
return o.permute(1,0)
def __repr__(self):
tmpstr = super(ImageRetrievalNet, self).__repr__()[:-1]
tmpstr += self.meta_repr()
tmpstr = tmpstr + ')'
return tmpstr
def meta_repr(self):
tmpstr = ' (' + 'meta' + '): dict( \n' # + self.meta.__repr__() + '\n'
tmpstr += ' architecture: {}\n'.format(self.meta['architecture'])
tmpstr += ' local_whitening: {}\n'.format(self.meta['local_whitening'])
tmpstr += ' pooling: {}\n'.format(self.meta['pooling'])
tmpstr += ' regional: {}\n'.format(self.meta['regional'])
tmpstr += ' whitening: {}\n'.format(self.meta['whitening'])
tmpstr += ' outputdim: {}\n'.format(self.meta['outputdim'])
tmpstr += ' mean: {}\n'.format(self.meta['mean'])
tmpstr += ' std: {}\n'.format(self.meta['std'])
tmpstr = tmpstr + ' )\n'
return tmpstr
def init_network(params):
# parse params with default values
architecture = params.get('architecture', 'resnet101')
local_whitening = params.get('local_whitening', False)
pooling = params.get('pooling', 'gem')
regional = params.get('regional', False)
whitening = params.get('whitening', False)
mean = params.get('mean', [0.485, 0.456, 0.406])
std = params.get('std', [0.229, 0.224, 0.225])
pretrained = params.get('pretrained', True)
# get output dimensionality size
dim = OUTPUT_DIM[architecture]
# loading network from torchvision
if pretrained:
if architecture not in FEATURES:
# initialize with network pretrained on imagenet in pytorch
net_in = getattr(torchvision.models, architecture)(pretrained=True)
else:
# initialize with random weights, later on we will fill features with custom pretrained network
net_in = getattr(torchvision.models, architecture)(pretrained=False)
else:
# initialize with random weights
net_in = getattr(torchvision.models, architecture)(pretrained=False)
# initialize features
# take only convolutions for features,
# always ends with ReLU to make last activations non-negative
if architecture.startswith('alexnet'):
features = list(net_in.features.children())[:-1]
elif architecture.startswith('vgg'):
features = list(net_in.features.children())[:-1]
elif architecture.startswith('resnet'):
features = list(net_in.children())[:-2]
elif architecture.startswith('densenet'):
features = list(net_in.features.children())
features.append(nn.ReLU(inplace=True))
elif architecture.startswith('squeezenet'):
features = list(net_in.features.children())
else:
raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))
# initialize local whitening
if local_whitening:
lwhiten = nn.Linear(dim, dim, bias=True)
# TODO: lwhiten with possible dimensionality reduce
if pretrained:
lw = architecture
if lw in L_WHITENING:
print(">> {}: for '{}' custom computed local whitening '{}' is used"
.format(os.path.basename(__file__), lw, os.path.basename(L_WHITENING[lw])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
lwhiten.load_state_dict(model_zoo.load_url(L_WHITENING[lw], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no local whitening computed, random weights are used"
.format(os.path.basename(__file__), lw))
else:
lwhiten = None
# initialize pooling
if pooling == 'gemmp':
pool = POOLING[pooling](mp=dim)
else:
pool = POOLING[pooling]()
# initialize regional pooling
if regional:
rpool = pool
rwhiten = nn.Linear(dim, dim, bias=True)
# TODO: rwhiten with possible dimensionality reduce
if pretrained:
rw = '{}-{}-r'.format(architecture, pooling)
if rw in R_WHITENING:
print(">> {}: for '{}' custom computed regional whitening '{}' is used"
.format(os.path.basename(__file__), rw, os.path.basename(R_WHITENING[rw])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
rwhiten.load_state_dict(model_zoo.load_url(R_WHITENING[rw], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no regional whitening computed, random weights are used"
.format(os.path.basename(__file__), rw))
pool = Rpool(rpool, rwhiten)
# initialize whitening
if whitening:
whiten = nn.Linear(dim, dim, bias=True)
# TODO: whiten with possible dimensionality reduce
if pretrained:
w = architecture
if local_whitening:
w += '-lw'
w += '-' + pooling
if regional:
w += '-r'
if w in WHITENING:
print(">> {}: for '{}' custom computed whitening '{}' is used"
.format(os.path.basename(__file__), w, os.path.basename(WHITENING[w])))
whiten_dir = os.path.join(get_data_root(), 'whiten')
whiten.load_state_dict(model_zoo.load_url(WHITENING[w], model_dir=whiten_dir))
else:
print(">> {}: for '{}' there is no whitening computed, random weights are used"
.format(os.path.basename(__file__), w))
else:
whiten = None
# create meta information to be stored in the network
meta = {
'architecture' : architecture,
'local_whitening' : local_whitening,
'pooling' : pooling,
'regional' : regional,
'whitening' : whitening,
'mean' : mean,
'std' : std,
'outputdim' : dim,
}
# create a generic image retrieval network
net = ImageRetrievalNet(features, lwhiten, pool, whiten, meta)
# initialize features with custom pretrained network if needed
if pretrained and architecture in FEATURES:
print(">> {}: for '{}' custom pretrained features '{}' are used"
.format(os.path.basename(__file__), architecture, os.path.basename(FEATURES[architecture])))
model_dir = os.path.join(get_data_root(), 'networks')
net.features.load_state_dict(model_zoo.load_url(FEATURES[architecture], model_dir=model_dir))
return net
def extract_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = torch.zeros(net.meta['outputdim'], len(images))
for i, input in enumerate(loader):
input = input.cuda()
if len(ms) == 1 and ms[0] == 1:
vecs[:, i] = extract_ss(net, input)
else:
vecs[:, i] = extract_ms(net, input, ms, msp)
if (i+1) % print_freq == 0 or (i+1) == len(images):
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
print('')
return vecs
def extract_ss(net, input):
return net(input).cpu().data.squeeze()
def extract_ms(net, input, ms, msp):
v = torch.zeros(net.meta['outputdim'])
for s in ms:
if s == 1:
input_t = input.clone()
else:
input_t = nn.functional.interpolate(input, scale_factor=s, mode='bilinear', align_corners=False)
v += net(input_t).pow(msp).cpu().data.squeeze()
v /= len(ms)
v = v.pow(1./msp)
v /= v.norm()
return v
def extract_regional_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = []
for i, input in enumerate(loader):
input = input.cuda()
if len(ms) == 1:
vecs.append(extract_ssr(net, input))
else:
# TODO: not implemented yet
# vecs.append(extract_msr(net, input, ms, msp))
raise NotImplementedError
if (i+1) % print_freq == 0 or (i+1) == len(images):
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
print('')
return vecs
def extract_ssr(net, input):
return net.pool(net.features(input), aggregate=False).squeeze(0).squeeze(-1).squeeze(-1).permute(1,0).cpu().data
def extract_local_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
# moving network to gpu and eval mode
net.cuda()
net.eval()
# creating dataset loader
loader = torch.utils.data.DataLoader(
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
)
# extracting vectors
with torch.no_grad():
vecs = []
for i, input in enumerate(loader):
input = input.cuda()
if len(ms) == 1:
vecs.append(extract_ssl(net, input))
else:
# TODO: not implemented yet
# vecs.append(extract_msl(net, input, ms, msp))
raise NotImplementedError
if (i+1) % print_freq == 0 or (i+1) == len(images):
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
print('')
return vecs
def extract_ssl(net, input):
return net.norm(net.features(input)).squeeze(0).view(net.meta['outputdim'], -1).cpu().data

0
cirtorch/utils/__init__.py Executable file
View File

154
cirtorch/utils/download.py Executable file
View File

@ -0,0 +1,154 @@
import os
def download_test(data_dir):
"""
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'test')
print('***************', os.path.exists(datasets_dir))
#print(not os.path.isdir(datasets_dir))
if not os.path.exists(datasets_dir):
os.mkdir(datasets_dir)
# Download datasets folders test/DATASETNAME/
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
for di in range(len(datasets)):
dataset = datasets[di]
if dataset == 'oxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'paris6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
elif dataset == 'roxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'rparis6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
else:
raise ValueError('Unknown dataset: {}!'.format(dataset))
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
print('%%%%%%%%%%%%%%%%',dst_dir, dataset)
if not os.path.exists(dst_dir):
# for oxford and paris download images
if dataset == 'oxford5k' or dataset == 'paris6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
os.makedirs(dst_dir)
for dli in range(len(dl_files)):
dl_file = dl_files[dli]
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
# create tmp folder
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
os.system('mkdir {}'.format(dst_dir_tmp))
# extract in tmp folder
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
# remove all (possible) subfolders by moving only files in dst_dir
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
# remove tmp folder
os.system('rm -rf {}'.format(dst_dir_tmp))
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
os.system('rm {}'.format(dst_file))
# for roxford and rparis just make sym links
elif dataset == 'roxford5k' or dataset == 'rparis6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
dataset_old = dataset[1:]
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
os.mkdir(os.path.join(datasets_dir, dataset))
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
gnd_dst_dir = os.path.join(datasets_dir, dataset)
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
if not os.path.exists(gnd_dst_file):
print('>> Downloading dataset {} ground truth file...'.format(dataset))
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
def download_train(data_dir):
"""
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'train')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download folder train/retrieval-SfM-120k/
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dl_file = 'ims.tar.gz'
if not os.path.isdir(dst_dir):
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
os.makedirs(dst_dir)
print('>> Downloading ims.tar.gz...')
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting {}...'.format(dst_file))
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
print('>> Extracted, deleting {}...'.format(dst_file))
os.system('rm {}'.format(dst_file))
# Create symlink for train/retrieval-SfM-30k/
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
if not os.path.exists(dst_dir):
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k'))
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
# Download db files
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
for dataset in datasets:
dst_dir = os.path.join(datasets_dir, dataset)
if dataset == 'retrieval-SfM-120k':
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
elif dataset == 'retrieval-SfM-30k':
dl_files = ['{}-whiten.pkl'.format(dataset)]
if not os.path.isdir(dst_dir):
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
os.mkdir(dst_dir)
for i in range(len(dl_files)):
src_file = os.path.join(src_dir, dl_files[i])
dst_file = os.path.join(dst_dir, dl_files[i])
if not os.path.isfile(dst_file):
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
os.system('wget {} -O {}'.format(src_file, dst_file))

152
cirtorch/utils/download_win.py Executable file
View File

@ -0,0 +1,152 @@
import os
def download_test(data_dir):
"""
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'test')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download datasets folders test/DATASETNAME/
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
for di in range(len(datasets)):
dataset = datasets[di]
if dataset == 'oxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'paris6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
elif dataset == 'roxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'rparis6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
else:
raise ValueError('Unknown dataset: {}!'.format(dataset))
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
if not os.path.isdir(dst_dir):
# for oxford and paris download images
if dataset == 'oxford5k' or dataset == 'paris6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
os.makedirs(dst_dir)
for dli in range(len(dl_files)):
dl_file = dl_files[dli]
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
# create tmp folder
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
os.system('mkdir {}'.format(dst_dir_tmp))
# extract in tmp folder
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
# remove all (possible) subfolders by moving only files in dst_dir
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
# remove tmp folder
os.system('rd {}'.format(dst_dir_tmp))
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
os.system('del {}'.format(dst_file))
# for roxford and rparis just make sym links
elif dataset == 'roxford5k' or dataset == 'rparis6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
dataset_old = dataset[1:]
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
os.mkdir(os.path.join(datasets_dir, dataset))
os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
gnd_dst_dir = os.path.join(datasets_dir, dataset)
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
if not os.path.exists(gnd_dst_file):
print('>> Downloading dataset {} ground truth file...'.format(dataset))
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
def download_train(data_dir):
"""
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
print(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'train')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download folder train/retrieval-SfM-120k/
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dl_file = 'ims.tar.gz'
if not os.path.isdir(dst_dir):
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
os.makedirs(dst_dir)
print('>> Downloading ims.tar.gz...')
# os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting {}...'.format(dst_file))
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
print('>> Extracted, deleting {}...'.format(dst_file))
os.system('del {}'.format(dst_file))
# Create symlink for train/retrieval-SfM-30k/
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
if not os.path.isdir(dst_dir):
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims'))
os.system('mklink {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
# Download db files
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
for dataset in datasets:
dst_dir = os.path.join(datasets_dir, dataset)
if dataset == 'retrieval-SfM-120k':
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
elif dataset == 'retrieval-SfM-30k':
dl_files = ['{}-whiten.pkl'.format(dataset)]
if not os.path.isdir(dst_dir):
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
os.mkdir(dst_dir)
for i in range(len(dl_files)):
src_file = os.path.join(src_dir, dl_files[i])
dst_file = os.path.join(dst_dir, dl_files[i])
if not os.path.isfile(dst_file):
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
os.system('wget {} -O {}'.format(src_file, dst_file))

149
cirtorch/utils/evaluate.py Executable file
View File

@ -0,0 +1,149 @@
import numpy as np
def compute_ap(ranks, nres):
"""
Computes average precision for given ranked indexes.
Arguments
---------
ranks : zerro-based ranks of positive images
nres : number of positive images
Returns
-------
ap : average precision
"""
# number of images ranked by the system
nimgranks = len(ranks)
# accumulate trapezoids in PR-plot
ap = 0
recall_step = 1. / nres
for j in np.arange(nimgranks):
rank = ranks[j]
if rank == 0:
precision_0 = 1.
else:
precision_0 = float(j) / rank
precision_1 = float(j + 1) / (rank + 1)
ap += (precision_0 + precision_1) * recall_step / 2.
return ap
def compute_map(ranks, gnd, kappas=[]):
"""
Computes the mAP for a given set of returned results.
Usage:
map = compute_map (ranks, gnd)
computes mean average precsion (map) only
map, aps, pr, prs = compute_map (ranks, gnd, kappas)
computes mean average precision (map), average precision (aps) for each query
computes mean precision at kappas (pr), precision at kappas (prs) for each query
Notes:
1) ranks starts from 0, ranks.shape = db_size X #queries
2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
3) If there are no positive images for some query, that query is excluded from the evaluation
"""
map = 0.
nq = len(gnd) # number of queries
aps = np.zeros(nq)
pr = np.zeros(len(kappas))
prs = np.zeros((nq, len(kappas)))
nempty = 0
for i in np.arange(nq):
qgnd = np.array(gnd[i]['ok'])
# no positive images, skip from the average
if qgnd.shape[0] == 0:
aps[i] = float('nan')
prs[i, :] = float('nan')
nempty += 1
continue
try:
qgndj = np.array(gnd[i]['junk'])
except:
qgndj = np.empty(0)
# sorted positions of positive and junk images (0 based)
pos = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgnd)]
junk = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgndj)]
k = 0;
ij = 0;
if len(junk):
# decrease positions of positives based on the number of
# junk images appearing before them
ip = 0
while (ip < len(pos)):
while (ij < len(junk) and pos[ip] > junk[ij]):
k += 1
ij += 1
pos[ip] = pos[ip] - k
ip += 1
# compute ap
ap = compute_ap(pos, len(qgnd))
map = map + ap
aps[i] = ap
# compute precision @ k
pos += 1 # get it to 1-based
for j in np.arange(len(kappas)):
kq = min(max(pos), kappas[j]);
prs[i, j] = (pos <= kq).sum() / kq
pr = pr + prs[i, :]
map = map / (nq - nempty)
pr = pr / (nq - nempty)
return map, aps, pr, prs
def compute_map_and_print(dataset, ranks, gnd, kappas=[1, 5, 10]):
# old evaluation protocol
if dataset.startswith('oxford5k') or dataset.startswith('paris6k'):
map, aps, _, _ = compute_map(ranks, gnd)
print('>> {}: mAP {:.2f}'.format(dataset, np.around(map*100, decimals=2)))
# new evaluation protocol
elif dataset.startswith('roxford5k') or dataset.startswith('rparis6k'):
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['easy']])
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['hard']])
gnd_t.append(g)
mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, kappas)
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk']])
gnd_t.append(g)
mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, kappas)
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']])
gnd_t.append(g)
mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas)
print('>> {}: mAP E: {}, M: {}, H: {}'.format(dataset, np.around(mapE*100, decimals=2), np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2)))
print('>> {}: mP@k{} E: {}, M: {}, H: {}'.format(dataset, kappas, np.around(mprE*100, decimals=2), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2)))

34
cirtorch/utils/general.py Executable file
View File

@ -0,0 +1,34 @@
import os
import hashlib
def get_root():
return os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
def get_data_root():
return os.path.join(get_root(), 'data')
def htime(c):
c = round(c)
days = c // 86400
hours = c // 3600 % 24
minutes = c // 60 % 60
seconds = c % 60
if days > 0:
return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds)
if hours > 0:
return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds)
if minutes > 0:
return '{:d}m {:d}s'.format(minutes, seconds)
return '{:d}s'.format(seconds)
def sha256_hash(filename, block_size=65536, length=8):
sha256 = hashlib.sha256()
with open(filename, 'rb') as f:
for block in iter(lambda: f.read(block_size), b''):
sha256.update(block)
return sha256.hexdigest()[:length-1]

65
cirtorch/utils/whiten.py Executable file
View File

@ -0,0 +1,65 @@
import os
import numpy as np
def whitenapply(X, m, P, dimensions=None):
if not dimensions:
dimensions = P.shape[0]
X = np.dot(P[:dimensions, :], X-m)
X = X / (np.linalg.norm(X, ord=2, axis=0, keepdims=True) + 1e-6)
return X
def pcawhitenlearn(X):
N = X.shape[1]
# Learning PCA w/o annotations
m = X.mean(axis=1, keepdims=True)
Xc = X - m
Xcov = np.dot(Xc, Xc.T)
Xcov = (Xcov + Xcov.T) / (2*N)
eigval, eigvec = np.linalg.eig(Xcov)
order = eigval.argsort()[::-1]
eigval = eigval[order]
eigvec = eigvec[:, order]
P = np.dot(np.linalg.inv(np.sqrt(np.diag(eigval))), eigvec.T)
return m, P
def whitenlearn(X, qidxs, pidxs):
# Learning Lw w annotations
m = X[:, qidxs].mean(axis=1, keepdims=True)
df = X[:, qidxs] - X[:, pidxs]
S = np.dot(df, df.T) / df.shape[1]
P = np.linalg.inv(cholesky(S))
df = np.dot(P, X-m)
D = np.dot(df, df.T)
eigval, eigvec = np.linalg.eig(D)
order = eigval.argsort()[::-1]
eigval = eigval[order]
eigvec = eigvec[:, order]
P = np.dot(eigvec.T, P)
return m, P
def cholesky(S):
# Cholesky decomposition
# with adding a small value on the diagonal
# until matrix is positive definite
alpha = 0
while 1:
try:
L = np.linalg.cholesky(S + alpha*np.eye(*S.shape))
return L
except:
if alpha == 0:
alpha = 1e-10
else:
alpha *= 10
print(">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} on the diagonal"
.format(os.path.basename(__file__), alpha))