first push
This commit is contained in:
BIN
cirtorch/.DS_Store
vendored
Normal file
BIN
cirtorch/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
cirtorch/IamgeRetrieval_dataset/train.pkl
Normal file
BIN
cirtorch/IamgeRetrieval_dataset/train.pkl
Normal file
Binary file not shown.
6
cirtorch/__init__.py
Executable file
6
cirtorch/__init__.py
Executable file
@ -0,0 +1,6 @@
|
||||
from . import datasets, examples, layers, networks, utils
|
||||
|
||||
from .datasets import datahelpers, genericdataset, testdataset, traindataset
|
||||
from .layers import functional, loss, normalization, pooling
|
||||
from .networks import imageretrievalnet
|
||||
from .utils import general, download, evaluate, whiten
|
0
cirtorch/datasets/__init__.py
Executable file
0
cirtorch/datasets/__init__.py
Executable file
56
cirtorch/datasets/datahelpers.py
Executable file
56
cirtorch/datasets/datahelpers.py
Executable file
@ -0,0 +1,56 @@
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
|
||||
def cid2filename(cid, prefix):
|
||||
"""
|
||||
Creates a training image path out of its CID name
|
||||
|
||||
Arguments
|
||||
---------
|
||||
cid : name of the image
|
||||
prefix : root directory where images are saved
|
||||
|
||||
Returns
|
||||
-------
|
||||
filename : full image filename
|
||||
"""
|
||||
return os.path.join(prefix, cid[-2:], cid[-4:-2], cid[-6:-4], cid)
|
||||
|
||||
def pil_loader(path):
|
||||
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
|
||||
with open(path, 'rb') as f:
|
||||
img = Image.open(f)
|
||||
return img.convert('RGB')
|
||||
|
||||
def accimage_loader(path):
|
||||
import accimage
|
||||
try:
|
||||
return accimage.Image(path)
|
||||
except IOError:
|
||||
# Potentially a decoding problem, fall back to PIL.Image
|
||||
return pil_loader(path)
|
||||
|
||||
def default_loader(path):
|
||||
from torchvision import get_image_backend
|
||||
if get_image_backend() == 'accimage':
|
||||
return accimage_loader(path)
|
||||
else:
|
||||
return pil_loader(path)
|
||||
|
||||
def imresize(img, imsize):
|
||||
img.thumbnail((imsize, imsize), Image.ANTIALIAS)
|
||||
return img
|
||||
|
||||
def flip(x, dim):
|
||||
xsize = x.size()
|
||||
dim = x.dim() + dim if dim < 0 else dim
|
||||
x = x.view(-1, *xsize[dim:])
|
||||
x = x.view(x.size(0), x.size(1), -1)[:, getattr(torch.arange(x.size(1)-1, -1, -1), ('cpu','cuda')[x.is_cuda])().long(), :]
|
||||
return x.view(xsize)
|
||||
|
||||
def collate_tuples(batch):
|
||||
if len(batch) == 1:
|
||||
return [batch[0][0]], [batch[0][1]]
|
||||
return [batch[i][0] for i in range(len(batch))], [batch[i][1] for i in range(len(batch))]
|
121
cirtorch/datasets/genericdataset.py
Executable file
121
cirtorch/datasets/genericdataset.py
Executable file
@ -0,0 +1,121 @@
|
||||
import os
|
||||
import pdb
|
||||
|
||||
import torch
|
||||
import torch.utils.data as data
|
||||
|
||||
from cirtorch.datasets.datahelpers import default_loader, imresize
|
||||
|
||||
|
||||
class ImagesFromList(data.Dataset):
|
||||
"""A generic data loader that loads images from a list
|
||||
(Based on ImageFolder from pytorch)
|
||||
Args:
|
||||
root (string): Root directory path.
|
||||
images (list): Relative image paths as strings.
|
||||
imsize (int, Default: None): Defines the maximum size of longer image side
|
||||
bbxs (list): List of (x1,y1,x2,y2) tuples to crop the query images
|
||||
transform (callable, optional): A function/transform that takes in an PIL image
|
||||
and returns a transformed version. E.g, ``transforms.RandomCrop``
|
||||
loader (callable, optional): A function to load an image given its path.
|
||||
Attributes:
|
||||
images_fn (list): List of full image filename
|
||||
"""
|
||||
|
||||
def __init__(self, root, images, imsize=None, bbxs=None, transform=None, loader=default_loader):
|
||||
|
||||
images_fn = [os.path.join(root,images[i]) for i in range(len(images))]
|
||||
|
||||
if len(images_fn) == 0:
|
||||
raise(RuntimeError("Dataset contains 0 images!"))
|
||||
|
||||
self.root = root
|
||||
self.images = images
|
||||
self.imsize = imsize
|
||||
self.images_fn = images_fn
|
||||
self.bbxs = bbxs
|
||||
self.transform = transform
|
||||
self.loader = loader
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
Returns:
|
||||
image (PIL): Loaded image
|
||||
"""
|
||||
path = self.images_fn[index]
|
||||
img = self.loader(path)
|
||||
imfullsize = max(img.size)
|
||||
|
||||
if self.bbxs is not None:
|
||||
print('self.bbxs>>>ok')
|
||||
img = img.crop(self.bbxs[index])
|
||||
|
||||
if self.imsize is not None:
|
||||
if self.bbxs is not None:
|
||||
print('self.bbxs and self.imsize>>>ok')
|
||||
img = imresize(img, self.imsize * max(img.size) / imfullsize)
|
||||
else:
|
||||
print('not self.bbxs and self.imsize>>>ok')
|
||||
img = imresize(img, self.imsize)
|
||||
|
||||
if self.transform is not None:
|
||||
print('self.transform>>>>>ok')
|
||||
img = self.transform(img)
|
||||
|
||||
return img, path
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images_fn)
|
||||
|
||||
def __repr__(self):
|
||||
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
|
||||
fmt_str += ' Number of images: {}\n'.format(self.__len__())
|
||||
fmt_str += ' Root Location: {}\n'.format(self.root)
|
||||
tmp = ' Transforms (if any): '
|
||||
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
return fmt_str
|
||||
|
||||
class ImagesFromDataList(data.Dataset):
|
||||
"""A generic data loader that loads images given as an array of pytorch tensors
|
||||
(Based on ImageFolder from pytorch)
|
||||
Args:
|
||||
images (list): Images as tensors.
|
||||
transform (callable, optional): A function/transform that image as a tensors
|
||||
and returns a transformed version. E.g, ``normalize`` with mean and std
|
||||
"""
|
||||
|
||||
def __init__(self, images, transform=None):
|
||||
|
||||
if len(images) == 0:
|
||||
raise(RuntimeError("Dataset contains 0 images!"))
|
||||
|
||||
self.images = images
|
||||
self.transform = transform
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
Returns:
|
||||
image (Tensor): Loaded image
|
||||
"""
|
||||
img = self.images[index]
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if len(img.size()):
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
return img
|
||||
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
|
||||
def __repr__(self):
|
||||
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
|
||||
fmt_str += ' Number of images: {}\n'.format(self.__len__())
|
||||
tmp = ' Transforms (if any): '
|
||||
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
return fmt_str
|
38
cirtorch/datasets/testdataset.py
Executable file
38
cirtorch/datasets/testdataset.py
Executable file
@ -0,0 +1,38 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
DATASETS = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
|
||||
def configdataset(dataset, dir_main):
|
||||
|
||||
dataset = dataset.lower()
|
||||
|
||||
if dataset not in DATASETS:
|
||||
raise ValueError('Unknown dataset: {}!'.format(dataset))
|
||||
|
||||
# loading imlist, qimlist, and gnd, in cfg as a dict
|
||||
gnd_fname = os.path.join(dir_main, dataset, 'gnd_{}.pkl'.format(dataset))
|
||||
with open(gnd_fname, 'rb') as f:
|
||||
cfg = pickle.load(f)
|
||||
cfg['gnd_fname'] = gnd_fname
|
||||
|
||||
cfg['ext'] = '.jpg'
|
||||
cfg['qext'] = '.jpg'
|
||||
cfg['dir_data'] = os.path.join(dir_main, dataset)
|
||||
cfg['dir_images'] = os.path.join(cfg['dir_data'], 'jpg')
|
||||
|
||||
cfg['n'] = len(cfg['imlist'])
|
||||
cfg['nq'] = len(cfg['qimlist'])
|
||||
|
||||
cfg['im_fname'] = config_imname
|
||||
cfg['qim_fname'] = config_qimname
|
||||
|
||||
cfg['dataset'] = dataset
|
||||
|
||||
return cfg
|
||||
|
||||
def config_imname(cfg, i):
|
||||
return os.path.join(cfg['dir_images'], cfg['imlist'][i] + cfg['ext'])
|
||||
|
||||
def config_qimname(cfg, i):
|
||||
return os.path.join(cfg['dir_images'], cfg['qimlist'][i] + cfg['qext'])
|
247
cirtorch/datasets/traindataset.py
Executable file
247
cirtorch/datasets/traindataset.py
Executable file
@ -0,0 +1,247 @@
|
||||
import os
|
||||
import pickle
|
||||
import pdb
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.data as data
|
||||
|
||||
from cirtorch.datasets.datahelpers import default_loader, imresize, cid2filename
|
||||
from cirtorch.datasets.genericdataset import ImagesFromList
|
||||
from cirtorch.utils.general import get_data_root
|
||||
|
||||
class TuplesDataset(data.Dataset):
|
||||
"""Data loader that loads training and validation tuples of
|
||||
Radenovic etal ECCV16: CNN image retrieval learns from BoW
|
||||
|
||||
Args:
|
||||
name (string): dataset name: 'retrieval-sfm-120k'
|
||||
mode (string): 'train' or 'val' for training and validation parts of dataset
|
||||
imsize (int, Default: None): Defines the maximum size of longer image side
|
||||
transform (callable, optional): A function/transform that takes in an PIL image
|
||||
and returns a transformed version. E.g, ``transforms.RandomCrop``
|
||||
loader (callable, optional): A function to load an image given its path.
|
||||
nnum (int, Default:5): Number of negatives for a query image in a training tuple
|
||||
qsize (int, Default:1000): Number of query images, ie number of (q,p,n1,...nN) tuples, to be processed in one epoch
|
||||
poolsize (int, Default:10000): Pool size for negative images re-mining
|
||||
|
||||
Attributes:
|
||||
images (list): List of full filenames for each image
|
||||
clusters (list): List of clusterID per image
|
||||
qpool (list): List of all query image indexes
|
||||
ppool (list): List of positive image indexes, each corresponding to query at the same position in qpool
|
||||
|
||||
qidxs (list): List of qsize query image indexes to be processed in an epoch
|
||||
pidxs (list): List of qsize positive image indexes, each corresponding to query at the same position in qidxs
|
||||
nidxs (list): List of qsize tuples of negative images
|
||||
Each nidxs tuple contains nnum images corresponding to query image at the same position in qidxs
|
||||
|
||||
Lists qidxs, pidxs, nidxs are refreshed by calling the ``create_epoch_tuples()`` method,
|
||||
ie new q-p pairs are picked and negative images are remined
|
||||
"""
|
||||
|
||||
def __init__(self, name, mode, imsize=None, nnum=5, qsize=2000, poolsize=20000, transform=None, loader=default_loader):
|
||||
|
||||
if not (mode == 'train' or mode == 'val'):
|
||||
raise(RuntimeError("MODE should be either train or val, passed as string"))
|
||||
|
||||
if name.startswith('retrieval-SfM'):
|
||||
# setting up paths
|
||||
#data_root = get_data_root()
|
||||
#db_root = os.path.join(data_root, 'train', name)
|
||||
#ims_root = os.path.join(db_root, 'ims')
|
||||
db_root = '/home/lc/project/Search_By_Image_Upgrade/cirtorch/IamgeRetrieval_dataset'
|
||||
ims_root = '/home/lc/project/Search_By_Image_Upgrade/cirtorch/IamgeRetrieval_dataset/train'
|
||||
# loading db
|
||||
db_fn = os.path.join(db_root, '{}.pkl'.format('train'))
|
||||
with open(db_fn, 'rb') as f:
|
||||
db = pickle.load(f)[mode]
|
||||
|
||||
# setting fullpath for images
|
||||
self.images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
|
||||
|
||||
#elif name.startswith('gl'):
|
||||
## TODO: NOT IMPLEMENTED YET PROPOERLY (WITH AUTOMATIC DOWNLOAD)
|
||||
|
||||
# setting up paths
|
||||
#db_root = '/mnt/fry2/users/datasets/landmarkscvprw18/recognition/'
|
||||
#ims_root = os.path.join(db_root, 'images', 'train')
|
||||
|
||||
# loading db
|
||||
#db_fn = os.path.join(db_root, '{}.pkl'.format('train'))
|
||||
#with open(db_fn, 'rb') as f:
|
||||
# db = pickle.load(f)[mode]
|
||||
|
||||
# setting fullpath for images
|
||||
self.images = [os.path.join(ims_root, db['cids'][i]) for i in range(len(db['cids']))]
|
||||
else:
|
||||
raise(RuntimeError("Unknown dataset name!"))
|
||||
|
||||
# initializing tuples dataset
|
||||
self.name = name
|
||||
self.mode = mode
|
||||
self.imsize = imsize
|
||||
self.clusters = db['cluster']
|
||||
self.qpool = db['qidxs']
|
||||
self.ppool = db['pidxs']
|
||||
|
||||
## If we want to keep only unique q-p pairs
|
||||
## However, ordering of pairs will change, although that is not important
|
||||
# qpidxs = list(set([(self.qidxs[i], self.pidxs[i]) for i in range(len(self.qidxs))]))
|
||||
# self.qidxs = [qpidxs[i][0] for i in range(len(qpidxs))]
|
||||
# self.pidxs = [qpidxs[i][1] for i in range(len(qpidxs))]
|
||||
|
||||
# size of training subset for an epoch
|
||||
self.nnum = nnum
|
||||
self.qsize = min(qsize, len(self.qpool))
|
||||
self.poolsize = min(poolsize, len(self.images))
|
||||
self.qidxs = None
|
||||
self.pidxs = None
|
||||
self.nidxs = None
|
||||
|
||||
self.transform = transform
|
||||
self.loader = loader
|
||||
|
||||
self.print_freq = 10
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
images tuple (q,p,n1,...,nN): Loaded train/val tuple at index of self.qidxs
|
||||
"""
|
||||
if self.__len__() == 0:
|
||||
raise(RuntimeError("List qidxs is empty. Run ``dataset.create_epoch_tuples(net)`` method to create subset for train/val!"))
|
||||
|
||||
output = []
|
||||
# query image
|
||||
output.append(self.loader(self.images[self.qidxs[index]]))
|
||||
# positive image
|
||||
output.append(self.loader(self.images[self.pidxs[index]]))
|
||||
# negative images
|
||||
for i in range(len(self.nidxs[index])):
|
||||
output.append(self.loader(self.images[self.nidxs[index][i]]))
|
||||
|
||||
if self.imsize is not None:
|
||||
output = [imresize(img, self.imsize) for img in output]
|
||||
|
||||
if self.transform is not None:
|
||||
output = [self.transform(output[i]).unsqueeze_(0) for i in range(len(output))]
|
||||
|
||||
target = torch.Tensor([-1, 1] + [0]*len(self.nidxs[index]))
|
||||
|
||||
return output, target
|
||||
|
||||
def __len__(self):
|
||||
# if not self.qidxs:
|
||||
# return 0
|
||||
# return len(self.qidxs)
|
||||
return self.qsize
|
||||
|
||||
def __repr__(self):
|
||||
fmt_str = self.__class__.__name__ + '\n'
|
||||
fmt_str += ' Name and mode: {} {}\n'.format(self.name, self.mode)
|
||||
fmt_str += ' Number of images: {}\n'.format(len(self.images))
|
||||
fmt_str += ' Number of training tuples: {}\n'.format(len(self.qpool))
|
||||
fmt_str += ' Number of negatives per tuple: {}\n'.format(self.nnum)
|
||||
fmt_str += ' Number of tuples processed in an epoch: {}\n'.format(self.qsize)
|
||||
fmt_str += ' Pool size for negative remining: {}\n'.format(self.poolsize)
|
||||
tmp = ' Transforms (if any): '
|
||||
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
return fmt_str
|
||||
|
||||
def create_epoch_tuples(self, net):
|
||||
|
||||
print('>> Creating tuples for an epoch of {}-{}...'.format(self.name, self.mode))
|
||||
print(">>>> used network: ")
|
||||
print(net.meta_repr())
|
||||
|
||||
## ------------------------
|
||||
## SELECTING POSITIVE PAIRS
|
||||
## ------------------------
|
||||
|
||||
# draw qsize random queries for tuples
|
||||
idxs2qpool = torch.randperm(len(self.qpool))[:self.qsize]
|
||||
self.qidxs = [self.qpool[i] for i in idxs2qpool]
|
||||
self.pidxs = [self.ppool[i] for i in idxs2qpool]
|
||||
|
||||
## ------------------------
|
||||
## SELECTING NEGATIVE PAIRS
|
||||
## ------------------------
|
||||
|
||||
# if nnum = 0 create dummy nidxs
|
||||
# useful when only positives used for training
|
||||
if self.nnum == 0:
|
||||
self.nidxs = [[] for _ in range(len(self.qidxs))]
|
||||
return 0
|
||||
|
||||
# draw poolsize random images for pool of negatives images
|
||||
idxs2images = torch.randperm(len(self.images))[:self.poolsize]
|
||||
|
||||
# prepare network
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# no gradients computed, to reduce memory and increase speed
|
||||
with torch.no_grad():
|
||||
|
||||
print('>> Extracting descriptors for query images...')
|
||||
# prepare query loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=[self.images[i] for i in self.qidxs], imsize=self.imsize, transform=self.transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
# extract query vectors
|
||||
qvecs = torch.zeros(net.meta['outputdim'], len(self.qidxs)).cuda()
|
||||
for i, input in enumerate(loader):
|
||||
#print('*********************',input,type(input))
|
||||
#print('#######################',type(input))
|
||||
qvecs[:, i] = net(input[0].cuda()).data.squeeze()
|
||||
if (i+1) % self.print_freq == 0 or (i+1) == len(self.qidxs):
|
||||
print('\r>>>> {}/{} done...'.format(i+1, len(self.qidxs)), end='')
|
||||
print('')
|
||||
|
||||
print('>> Extracting descriptors for negative pool...')
|
||||
# prepare negative pool data loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=[self.images[i] for i in idxs2images], imsize=self.imsize, transform=self.transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
# extract negative pool vectors
|
||||
poolvecs = torch.zeros(net.meta['outputdim'], len(idxs2images)).cuda()
|
||||
for i, input in enumerate(loader):
|
||||
poolvecs[:, i] = net(input[0].cuda()).data.squeeze()
|
||||
if (i+1) % self.print_freq == 0 or (i+1) == len(idxs2images):
|
||||
print('\r>>>> {}/{} done...'.format(i+1, len(idxs2images)), end='')
|
||||
print('')
|
||||
|
||||
print('>> Searching for hard negatives...')
|
||||
# compute dot product scores and ranks on GPU
|
||||
scores = torch.mm(poolvecs.t(), qvecs)
|
||||
scores, ranks = torch.sort(scores, dim=0, descending=True)
|
||||
avg_ndist = torch.tensor(0).float().cuda() # for statistics
|
||||
n_ndist = torch.tensor(0).float().cuda() # for statistics
|
||||
# selection of negative examples
|
||||
self.nidxs = []
|
||||
for q in range(len(self.qidxs)):
|
||||
# do not use query cluster,
|
||||
# those images are potentially positive
|
||||
qcluster = self.clusters[self.qidxs[q]]
|
||||
clusters = [qcluster]
|
||||
nidxs = []
|
||||
r = 0
|
||||
while len(nidxs) < self.nnum:
|
||||
potential = idxs2images[ranks[r, q]]
|
||||
# take at most one image from the same cluster
|
||||
if not self.clusters[potential] in clusters:
|
||||
nidxs.append(potential)
|
||||
clusters.append(self.clusters[potential])
|
||||
avg_ndist += torch.pow(qvecs[:,q]-poolvecs[:,ranks[r, q]]+1e-6, 2).sum(dim=0).sqrt()
|
||||
n_ndist += 1
|
||||
r += 1
|
||||
self.nidxs.append(nidxs)
|
||||
print('>>>> Average negative l2-distance: {:.2f}'.format(avg_ndist/n_ndist))
|
||||
print('>>>> Done')
|
||||
|
||||
return (avg_ndist/n_ndist).item() # return average negative l2-distance
|
0
cirtorch/examples/__init__.py
Executable file
0
cirtorch/examples/__init__.py
Executable file
266
cirtorch/examples/test.py
Executable file
266
cirtorch/examples/test.py
Executable file
@ -0,0 +1,266 @@
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import pickle
|
||||
import pdb
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
from torch.utils.model_zoo import load_url
|
||||
from torchvision import transforms
|
||||
|
||||
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
|
||||
from cirtorch.datasets.datahelpers import cid2filename
|
||||
from cirtorch.datasets.testdataset import configdataset
|
||||
from cirtorch.utils.download import download_train, download_test
|
||||
from cirtorch.utils.whiten import whitenlearn, whitenapply
|
||||
from cirtorch.utils.evaluate import compute_map_and_print
|
||||
from cirtorch.utils.general import get_data_root, htime
|
||||
|
||||
PRETRAINED = {
|
||||
'retrievalSfM120k-vgg16-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/retrievalSfM120k-vgg16-gem-b4dcdc6.pth',
|
||||
'retrievalSfM120k-resnet101-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/retrievalSfM120k-resnet101-gem-b80fb85.pth',
|
||||
# new networks with whitening learned end-to-end
|
||||
'rSfM120k-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet50-gem-w-97bf910.pth',
|
||||
'rSfM120k-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet101-gem-w-a155e54.pth',
|
||||
'rSfM120k-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet152-gem-w-f39cada.pth',
|
||||
'gl18-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet50-gem-w-83fdc30.pth',
|
||||
'gl18-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet101-gem-w-a4d43db.pth',
|
||||
'gl18-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet152-gem-w-21278d5.pth',
|
||||
}
|
||||
|
||||
datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
whitening_names = ['retrieval-SfM-30k', 'retrieval-SfM-120k']
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Testing')
|
||||
|
||||
# network
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--network-path', '-npath', metavar='NETWORK',
|
||||
help="pretrained network or network path (destination where network is saved)")
|
||||
group.add_argument('--network-offtheshelf', '-noff', metavar='NETWORK',
|
||||
help="off-the-shelf network, in the format 'ARCHITECTURE-POOLING' or 'ARCHITECTURE-POOLING-{reg-lwhiten-whiten}'," +
|
||||
" examples: 'resnet101-gem' | 'resnet101-gem-reg' | 'resnet101-gem-whiten' | 'resnet101-gem-lwhiten' | 'resnet101-gem-reg-whiten'")
|
||||
|
||||
# test options
|
||||
parser.add_argument('--datasets', '-d', metavar='DATASETS', default='oxford5k,paris6k',
|
||||
help="comma separated list of test datasets: " +
|
||||
" | ".join(datasets_names) +
|
||||
" (default: 'oxford5k,paris6k')")
|
||||
parser.add_argument('--image-size', '-imsize', default=1024, type=int, metavar='N',
|
||||
help="maximum size of longer image side used for testing (default: 1024)")
|
||||
parser.add_argument('--multiscale', '-ms', metavar='MULTISCALE', default='[1]',
|
||||
help="use multiscale vectors for testing, " +
|
||||
" examples: '[1]' | '[1, 1/2**(1/2), 1/2]' | '[1, 2**(1/2), 1/2**(1/2)]' (default: '[1]')")
|
||||
parser.add_argument('--whitening', '-w', metavar='WHITENING', default=None, choices=whitening_names,
|
||||
help="dataset used to learn whitening for testing: " +
|
||||
" | ".join(whitening_names) +
|
||||
" (default: None)")
|
||||
|
||||
# GPU ID
|
||||
parser.add_argument('--gpu-id', '-g', default='0', metavar='N',
|
||||
help="gpu id used for testing (default: '0')")
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# check if there are unknown datasets
|
||||
for dataset in args.datasets.split(','):
|
||||
if dataset not in datasets_names:
|
||||
raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
|
||||
|
||||
# check if test dataset are downloaded
|
||||
# and download if they are not
|
||||
download_train(get_data_root())
|
||||
download_test(get_data_root())
|
||||
|
||||
# setting up the visible GPU
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
|
||||
|
||||
# loading network from path
|
||||
if args.network_path is not None:
|
||||
|
||||
print(">> Loading network:\n>>>> '{}'".format(args.network_path))
|
||||
if args.network_path in PRETRAINED:
|
||||
# pretrained networks (downloaded automatically)
|
||||
state = load_url(PRETRAINED[args.network_path], model_dir=os.path.join(get_data_root(), 'networks'))
|
||||
else:
|
||||
# fine-tuned network from path
|
||||
state = torch.load(args.network_path)
|
||||
|
||||
# parsing net params from meta
|
||||
# architecture, pooling, mean, std required
|
||||
# the rest has default values, in case that is doesnt exist
|
||||
net_params = {}
|
||||
net_params['architecture'] = state['meta']['architecture']
|
||||
net_params['pooling'] = state['meta']['pooling']
|
||||
net_params['local_whitening'] = state['meta'].get('local_whitening', False)
|
||||
net_params['regional'] = state['meta'].get('regional', False)
|
||||
net_params['whitening'] = state['meta'].get('whitening', False)
|
||||
net_params['mean'] = state['meta']['mean']
|
||||
net_params['std'] = state['meta']['std']
|
||||
net_params['pretrained'] = False
|
||||
|
||||
# load network
|
||||
net = init_network(net_params)
|
||||
net.load_state_dict(state['state_dict'])
|
||||
|
||||
# if whitening is precomputed
|
||||
if 'Lw' in state['meta']:
|
||||
net.meta['Lw'] = state['meta']['Lw']
|
||||
|
||||
print(">>>> loaded network: ")
|
||||
print(net.meta_repr())
|
||||
|
||||
# loading offtheshelf network
|
||||
elif args.network_offtheshelf is not None:
|
||||
|
||||
# parse off-the-shelf parameters
|
||||
offtheshelf = args.network_offtheshelf.split('-')
|
||||
net_params = {}
|
||||
net_params['architecture'] = offtheshelf[0]
|
||||
net_params['pooling'] = offtheshelf[1]
|
||||
net_params['local_whitening'] = 'lwhiten' in offtheshelf[2:]
|
||||
net_params['regional'] = 'reg' in offtheshelf[2:]
|
||||
net_params['whitening'] = 'whiten' in offtheshelf[2:]
|
||||
net_params['pretrained'] = True
|
||||
|
||||
# load off-the-shelf network
|
||||
print(">> Loading off-the-shelf network:\n>>>> '{}'".format(args.network_offtheshelf))
|
||||
net = init_network(net_params)
|
||||
print(">>>> loaded network: ")
|
||||
print(net.meta_repr())
|
||||
|
||||
# setting up the multi-scale parameters
|
||||
ms = list(eval(args.multiscale))
|
||||
if len(ms)>1 and net.meta['pooling'] == 'gem' and not net.meta['regional'] and not net.meta['whitening']:
|
||||
msp = net.pool.p.item()
|
||||
print(">> Set-up multiscale:")
|
||||
print(">>>> ms: {}".format(ms))
|
||||
print(">>>> msp: {}".format(msp))
|
||||
else:
|
||||
msp = 1
|
||||
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# set up the transform
|
||||
normalize = transforms.Normalize(
|
||||
mean=net.meta['mean'],
|
||||
std=net.meta['std']
|
||||
)
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
normalize
|
||||
])
|
||||
|
||||
# compute whitening
|
||||
if args.whitening is not None:
|
||||
start = time.time()
|
||||
|
||||
if 'Lw' in net.meta and args.whitening in net.meta['Lw']:
|
||||
|
||||
print('>> {}: Whitening is precomputed, loading it...'.format(args.whitening))
|
||||
|
||||
if len(ms)>1:
|
||||
Lw = net.meta['Lw'][args.whitening]['ms']
|
||||
else:
|
||||
Lw = net.meta['Lw'][args.whitening]['ss']
|
||||
|
||||
else:
|
||||
|
||||
# if we evaluate networks from path we should save/load whitening
|
||||
# not to compute it every time
|
||||
if args.network_path is not None:
|
||||
whiten_fn = args.network_path + '_{}_whiten'.format(args.whitening)
|
||||
if len(ms) > 1:
|
||||
whiten_fn += '_ms'
|
||||
whiten_fn += '.pth'
|
||||
else:
|
||||
whiten_fn = None
|
||||
|
||||
if whiten_fn is not None and os.path.isfile(whiten_fn):
|
||||
print('>> {}: Whitening is precomputed, loading it...'.format(args.whitening))
|
||||
Lw = torch.load(whiten_fn)
|
||||
|
||||
else:
|
||||
print('>> {}: Learning whitening...'.format(args.whitening))
|
||||
|
||||
# loading db
|
||||
db_root = os.path.join(get_data_root(), 'train', args.whitening)
|
||||
ims_root = os.path.join(db_root, 'ims')
|
||||
db_fn = os.path.join(db_root, '{}-whiten.pkl'.format(args.whitening))
|
||||
with open(db_fn, 'rb') as f:
|
||||
db = pickle.load(f)
|
||||
images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
|
||||
|
||||
# extract whitening vectors
|
||||
print('>> {}: Extracting...'.format(args.whitening))
|
||||
wvecs = extract_vectors(net, images, args.image_size, transform, ms=ms, msp=msp)
|
||||
|
||||
# learning whitening
|
||||
print('>> {}: Learning...'.format(args.whitening))
|
||||
wvecs = wvecs.numpy()
|
||||
m, P = whitenlearn(wvecs, db['qidxs'], db['pidxs'])
|
||||
Lw = {'m': m, 'P': P}
|
||||
|
||||
# saving whitening if whiten_fn exists
|
||||
if whiten_fn is not None:
|
||||
print('>> {}: Saving to {}...'.format(args.whitening, whiten_fn))
|
||||
torch.save(Lw, whiten_fn)
|
||||
|
||||
print('>> {}: elapsed time: {}'.format(args.whitening, htime(time.time()-start)))
|
||||
|
||||
else:
|
||||
Lw = None
|
||||
|
||||
# evaluate on test datasets
|
||||
datasets = args.datasets.split(',')
|
||||
for dataset in datasets:
|
||||
start = time.time()
|
||||
|
||||
print('>> {}: Extracting...'.format(dataset))
|
||||
|
||||
# prepare config structure for the test dataset
|
||||
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
|
||||
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
|
||||
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
|
||||
try:
|
||||
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
|
||||
except:
|
||||
bbxs = None # for holidaysmanrot and copydays
|
||||
|
||||
# extract database and query vectors
|
||||
print('>> {}: database images...'.format(dataset))
|
||||
vecs = extract_vectors(net, images, args.image_size, transform, ms=ms, msp=msp)
|
||||
print('>> {}: query images...'.format(dataset))
|
||||
qvecs = extract_vectors(net, qimages, args.image_size, transform, bbxs=bbxs, ms=ms, msp=msp)
|
||||
|
||||
print('>> {}: Evaluating...'.format(dataset))
|
||||
|
||||
# convert to numpy
|
||||
vecs = vecs.numpy()
|
||||
qvecs = qvecs.numpy()
|
||||
|
||||
# search, rank, and print
|
||||
scores = np.dot(vecs.T, qvecs)
|
||||
ranks = np.argsort(-scores, axis=0)
|
||||
compute_map_and_print(dataset, ranks, cfg['gnd'])
|
||||
|
||||
if Lw is not None:
|
||||
# whiten the vectors
|
||||
vecs_lw = whitenapply(vecs, Lw['m'], Lw['P'])
|
||||
qvecs_lw = whitenapply(qvecs, Lw['m'], Lw['P'])
|
||||
|
||||
# search, rank, and print
|
||||
scores = np.dot(vecs_lw.T, qvecs_lw)
|
||||
ranks = np.argsort(-scores, axis=0)
|
||||
compute_map_and_print(dataset + ' + whiten', ranks, cfg['gnd'])
|
||||
|
||||
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
145
cirtorch/examples/test_e2e.py
Executable file
145
cirtorch/examples/test_e2e.py
Executable file
@ -0,0 +1,145 @@
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
import pickle
|
||||
import pdb
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
from torch.utils.model_zoo import load_url
|
||||
from torchvision import transforms
|
||||
|
||||
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
|
||||
from cirtorch.datasets.testdataset import configdataset
|
||||
from cirtorch.utils.download import download_train, download_test
|
||||
from cirtorch.utils.evaluate import compute_map_and_print
|
||||
from cirtorch.utils.general import get_data_root, htime
|
||||
|
||||
PRETRAINED = {
|
||||
'rSfM120k-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet50-gem-w-97bf910.pth',
|
||||
'rSfM120k-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet101-gem-w-a155e54.pth',
|
||||
'rSfM120k-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/retrieval-SfM-120k/rSfM120k-tl-resnet152-gem-w-f39cada.pth',
|
||||
'gl18-tl-resnet50-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet50-gem-w-83fdc30.pth',
|
||||
'gl18-tl-resnet101-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet101-gem-w-a4d43db.pth',
|
||||
'gl18-tl-resnet152-gem-w' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/gl18-tl-resnet152-gem-w-21278d5.pth',
|
||||
}
|
||||
|
||||
datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Testing End-to-End')
|
||||
|
||||
# test options
|
||||
parser.add_argument('--network', '-n', metavar='NETWORK',
|
||||
help="network to be evaluated: " +
|
||||
" | ".join(PRETRAINED.keys()))
|
||||
parser.add_argument('--datasets', '-d', metavar='DATASETS', default='roxford5k,rparis6k',
|
||||
help="comma separated list of test datasets: " +
|
||||
" | ".join(datasets_names) +
|
||||
" (default: 'roxford5k,rparis6k')")
|
||||
parser.add_argument('--image-size', '-imsize', default=1024, type=int, metavar='N',
|
||||
help="maximum size of longer image side used for testing (default: 1024)")
|
||||
parser.add_argument('--multiscale', '-ms', metavar='MULTISCALE', default='[1]',
|
||||
help="use multiscale vectors for testing, " +
|
||||
" examples: '[1]' | '[1, 1/2**(1/2), 1/2]' | '[1, 2**(1/2), 1/2**(1/2)]' (default: '[1]')")
|
||||
|
||||
# GPU ID
|
||||
parser.add_argument('--gpu-id', '-g', default='0', metavar='N',
|
||||
help="gpu id used for testing (default: '0')")
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# check if there are unknown datasets
|
||||
for dataset in args.datasets.split(','):
|
||||
if dataset not in datasets_names:
|
||||
raise ValueError('Unsupported or unknown dataset: {}!'.format(dataset))
|
||||
|
||||
# check if test dataset are downloaded
|
||||
# and download if they are not
|
||||
download_train(get_data_root())
|
||||
download_test(get_data_root())
|
||||
|
||||
# setting up the visible GPU
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
|
||||
|
||||
# loading network
|
||||
# pretrained networks (downloaded automatically)
|
||||
print(">> Loading network:\n>>>> '{}'".format(args.network))
|
||||
state = load_url(PRETRAINED[args.network], model_dir=os.path.join(get_data_root(), 'networks'))
|
||||
# state = torch.load(args.network)
|
||||
# parsing net params from meta
|
||||
# architecture, pooling, mean, std required
|
||||
# the rest has default values, in case that is doesnt exist
|
||||
net_params = {}
|
||||
net_params['architecture'] = state['meta']['architecture']
|
||||
net_params['pooling'] = state['meta']['pooling']
|
||||
net_params['local_whitening'] = state['meta'].get('local_whitening', False)
|
||||
net_params['regional'] = state['meta'].get('regional', False)
|
||||
net_params['whitening'] = state['meta'].get('whitening', False)
|
||||
net_params['mean'] = state['meta']['mean']
|
||||
net_params['std'] = state['meta']['std']
|
||||
net_params['pretrained'] = False
|
||||
# network initialization
|
||||
net = init_network(net_params)
|
||||
net.load_state_dict(state['state_dict'])
|
||||
|
||||
print(">>>> loaded network: ")
|
||||
print(net.meta_repr())
|
||||
|
||||
# setting up the multi-scale parameters
|
||||
ms = list(eval(args.multiscale))
|
||||
print(">>>> Evaluating scales: {}".format(ms))
|
||||
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# set up the transform
|
||||
normalize = transforms.Normalize(
|
||||
mean=net.meta['mean'],
|
||||
std=net.meta['std']
|
||||
)
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
normalize
|
||||
])
|
||||
|
||||
# evaluate on test datasets
|
||||
datasets = args.datasets.split(',')
|
||||
for dataset in datasets:
|
||||
start = time.time()
|
||||
|
||||
print('>> {}: Extracting...'.format(dataset))
|
||||
|
||||
# prepare config structure for the test dataset
|
||||
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
|
||||
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
|
||||
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
|
||||
try:
|
||||
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
|
||||
except:
|
||||
bbxs = None # for holidaysmanrot and copydays
|
||||
|
||||
# extract database and query vectors
|
||||
print('>> {}: database images...'.format(dataset))
|
||||
vecs = extract_vectors(net, images, args.image_size, transform, ms=ms)
|
||||
print('>> {}: query images...'.format(dataset))
|
||||
qvecs = extract_vectors(net, qimages, args.image_size, transform, bbxs=bbxs, ms=ms)
|
||||
|
||||
print('>> {}: Evaluating...'.format(dataset))
|
||||
|
||||
# convert to numpy
|
||||
vecs = vecs.numpy()
|
||||
qvecs = qvecs.numpy()
|
||||
|
||||
# search, rank, and print
|
||||
scores = np.dot(vecs.T, qvecs)
|
||||
ranks = np.argsort(-scores, axis=0)
|
||||
compute_map_and_print(dataset, ranks, cfg['gnd'])
|
||||
|
||||
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
580
cirtorch/examples/train.py
Executable file
580
cirtorch/examples/train.py
Executable file
@ -0,0 +1,580 @@
|
||||
import sys
|
||||
sys.path.append('/home/lc/project/Search_By_Image_Upgrade')
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import math
|
||||
import pickle
|
||||
import pdb
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim
|
||||
import torch.utils.data
|
||||
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.models as models
|
||||
|
||||
from cirtorch.networks.imageretrievalnet import init_network, extract_vectors
|
||||
from cirtorch.layers.loss import ContrastiveLoss, TripletLoss
|
||||
from cirtorch.datasets.datahelpers import collate_tuples, cid2filename
|
||||
from cirtorch.datasets.traindataset import TuplesDataset
|
||||
from cirtorch.datasets.testdataset import configdataset
|
||||
from cirtorch.utils.download import download_train, download_test
|
||||
from cirtorch.utils.whiten import whitenlearn, whitenapply
|
||||
from cirtorch.utils.evaluate import compute_map_and_print
|
||||
from cirtorch.utils.general import get_data_root, htime
|
||||
|
||||
training_dataset_names = ['retrieval-SfM-120k']
|
||||
test_datasets_names = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
test_whiten_names = ['retrieval-SfM-30k', 'retrieval-SfM-120k']
|
||||
|
||||
model_names = sorted(name for name in models.__dict__
|
||||
if name.islower() and not name.startswith("__")
|
||||
and callable(models.__dict__[name]))
|
||||
pool_names = ['mac', 'spoc', 'gem', 'gemmp']
|
||||
loss_names = ['contrastive', 'triplet']
|
||||
optimizer_names = ['sgd', 'adam']
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch CNN Image Retrieval Training')
|
||||
|
||||
# export directory, training and val datasets, test datasets
|
||||
parser.add_argument('directory', metavar='EXPORT_DIR',default='models',
|
||||
help='destination where trained network should be saved')
|
||||
parser.add_argument('--training-dataset', '-d', metavar='DATASET', default='retrieval-SfM-120k', choices=training_dataset_names,
|
||||
help='training dataset: ' +
|
||||
' | '.join(training_dataset_names) +
|
||||
' (default: retrieval-SfM-120k)')
|
||||
parser.add_argument('--no-val', dest='val', action='store_false',default = False,
|
||||
help='do not run validation')
|
||||
parser.add_argument('--test-datasets', '-td', metavar='DATASETS', default='roxford5k,rparis6k',
|
||||
help='comma separated list of test datasets: ' +
|
||||
' | '.join(test_datasets_names) +
|
||||
' (default: roxford5k,rparis6k)')
|
||||
parser.add_argument('--test-whiten', metavar='DATASET', default='', choices=test_whiten_names,
|
||||
help='dataset used to learn whitening for testing: ' +
|
||||
' | '.join(test_whiten_names) +
|
||||
' (default: None)')
|
||||
parser.add_argument('--test-freq', default=1, type=int, metavar='N',
|
||||
help='run test evaluation every N epochs (default: 1)')
|
||||
|
||||
# network architecture and initialization options
|
||||
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet50', choices=model_names,
|
||||
help='model architecture: ' +
|
||||
' | '.join(model_names) +
|
||||
' (default: resnet101)')
|
||||
parser.add_argument('--pool', '-p', metavar='POOL', default='gem', choices=pool_names,
|
||||
help='pooling options: ' +
|
||||
' | '.join(pool_names) +
|
||||
' (default: gem)')
|
||||
parser.add_argument('--local-whitening', '-lw', dest='local_whitening', action='store_true',
|
||||
help='train model with learnable local whitening (linear layer) before the pooling')
|
||||
parser.add_argument('--regional', '-r', dest='regional', action='store_true',
|
||||
help='train model with regional pooling using fixed grid')
|
||||
parser.add_argument('--whitening', '-w', dest='whitening', action='store_true',
|
||||
help='train model with learnable whitening (linear layer) after the pooling')
|
||||
parser.add_argument('--not-pretrained', dest='pretrained', action='store_false',
|
||||
help='initialize model with random weights (default: pretrained on imagenet)')
|
||||
parser.add_argument('--loss', '-l', metavar='LOSS', default='contrastive',
|
||||
choices=loss_names,
|
||||
help='training loss options: ' +
|
||||
' | '.join(loss_names) +
|
||||
' (default: contrastive)')
|
||||
parser.add_argument('--loss-margin', '-lm', metavar='LM', default=0.7, type=float,
|
||||
help='loss margin: (default: 0.7)')
|
||||
|
||||
# train/val options specific for image retrieval learning
|
||||
parser.add_argument('--image-size', default=648, type=int, metavar='N', # 1024
|
||||
help='maximum size of longer image side used for training (default: 1024)')
|
||||
parser.add_argument('--neg-num', '-nn', default=5, type=int, metavar='N',
|
||||
help='number of negative image per train/val tuple (default: 5)')
|
||||
parser.add_argument('--query-size', '-qs', default=2000, type=int, metavar='N',
|
||||
help='number of queries randomly drawn per one train epoch (default: 2000)')
|
||||
parser.add_argument('--pool-size', '-ps', default=20000, type=int, metavar='N',
|
||||
help='size of the pool for hard negative mining (default: 20000)')
|
||||
|
||||
# standard train/val options
|
||||
parser.add_argument('--gpu-id', '-g', default='0,1', metavar='N',
|
||||
help='gpu id used for training (default: 0)')
|
||||
parser.add_argument('--workers', '-j', default=8, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 8)')
|
||||
parser.add_argument('--epochs', default=100, type=int, metavar='N',
|
||||
help='number of total epochs to run (default: 100)')
|
||||
parser.add_argument('--batch-size', '-b', default=32, type=int, metavar='N',
|
||||
help='number of (q,p,n1,...,nN) tuples in a mini-batch (default: 5)')
|
||||
parser.add_argument('--update-every', '-u', default=1, type=int, metavar='N',
|
||||
help='update model weights every N batches, used to handle really large batches, ' +
|
||||
'batch_size effectively becomes update_every x batch_size (default: 1)')
|
||||
parser.add_argument('--optimizer', '-o', metavar='OPTIMIZER', default='adam',
|
||||
choices=optimizer_names,
|
||||
help='optimizer options: ' +
|
||||
' | '.join(optimizer_names) +
|
||||
' (default: adam)')
|
||||
parser.add_argument('--lr', '--learning-rate', default=1e-6, type=float,
|
||||
metavar='LR', help='initial learning rate (default: 1e-6)')
|
||||
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
|
||||
help='momentum')
|
||||
parser.add_argument('--weight-decay', '--wd', default=1e-6, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-6)')
|
||||
parser.add_argument('--print-freq', default=10, type=int,
|
||||
metavar='N', help='print frequency (default: 10)')
|
||||
parser.add_argument('--resume', default='', type=str, metavar='FILENAME',
|
||||
help='name of the latest checkpoint (default: None)')
|
||||
|
||||
min_loss = float('inf')
|
||||
|
||||
def main():
|
||||
global args, min_loss
|
||||
args = parser.parse_args()
|
||||
|
||||
# manually check if there are unknown test datasets
|
||||
for dataset in args.test_datasets.split(','):
|
||||
if dataset not in test_datasets_names:
|
||||
raise ValueError('Unsupported or unknown test dataset: {}!'.format(dataset))
|
||||
|
||||
# check if test dataset are downloaded
|
||||
# and download if they are not
|
||||
download_train(get_data_root())
|
||||
download_test(get_data_root())
|
||||
|
||||
# create export dir if it doesnt exist
|
||||
directory = "{}".format(args.training_dataset)
|
||||
directory += "_{}".format(args.arch)
|
||||
directory += "_{}".format(args.pool)
|
||||
if args.local_whitening:
|
||||
directory += "_lwhiten"
|
||||
if args.regional:
|
||||
directory += "_r"
|
||||
if args.whitening:
|
||||
directory += "_whiten"
|
||||
if not args.pretrained:
|
||||
directory += "_notpretrained"
|
||||
directory += "_{}_m{:.2f}".format(args.loss, args.loss_margin)
|
||||
directory += "_{}_lr{:.1e}_wd{:.1e}".format(args.optimizer, args.lr, args.weight_decay)
|
||||
directory += "_nnum{}_qsize{}_psize{}".format(args.neg_num, args.query_size, args.pool_size)
|
||||
directory += "_bsize{}_uevery{}_imsize{}".format(args.batch_size, args.update_every, args.image_size)
|
||||
|
||||
args.directory = os.path.join(args.directory, directory)
|
||||
print(">> Creating directory if it does not exist:\n>> '{}'".format(args.directory))
|
||||
if not os.path.exists(args.directory):
|
||||
os.makedirs(args.directory)
|
||||
|
||||
# set cuda visible device
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
|
||||
|
||||
# set random seeds
|
||||
# TODO: maybe pass as argument in future implementation?
|
||||
torch.manual_seed(0)
|
||||
torch.cuda.manual_seed_all(0)
|
||||
np.random.seed(0)
|
||||
|
||||
# initialize model
|
||||
if args.pretrained:
|
||||
print(">> Using pre-trained model '{}'".format(args.arch))
|
||||
else:
|
||||
print(">> Using model from scratch (random weights) '{}'".format(args.arch))
|
||||
model_params = {}
|
||||
model_params['architecture'] = args.arch
|
||||
model_params['pooling'] = args.pool
|
||||
model_params['local_whitening'] = args.local_whitening
|
||||
model_params['regional'] = args.regional
|
||||
model_params['whitening'] = args.whitening
|
||||
# model_params['mean'] = ... # will use default
|
||||
# model_params['std'] = ... # will use default
|
||||
model_params['pretrained'] = args.pretrained
|
||||
model = init_network(model_params)
|
||||
|
||||
# move network to gpu
|
||||
model.cuda()
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
if args.loss == 'contrastive':
|
||||
criterion = ContrastiveLoss(margin=args.loss_margin).cuda()
|
||||
elif args.loss == 'triplet':
|
||||
criterion = TripletLoss(margin=args.loss_margin).cuda()
|
||||
else:
|
||||
raise(RuntimeError("Loss {} not available!".format(args.loss)))
|
||||
|
||||
# parameters split into features, pool, whitening
|
||||
# IMPORTANT: no weight decay for pooling parameter p in GeM or regional-GeM
|
||||
parameters = []
|
||||
# add feature parameters
|
||||
parameters.append({'params': model.features.parameters()})
|
||||
# add local whitening if exists
|
||||
if model.lwhiten is not None:
|
||||
parameters.append({'params': model.lwhiten.parameters()})
|
||||
# add pooling parameters (or regional whitening which is part of the pooling layer!)
|
||||
if not args.regional:
|
||||
# global, only pooling parameter p weight decay should be 0
|
||||
if args.pool == 'gem':
|
||||
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
|
||||
elif args.pool == 'gemmp':
|
||||
parameters.append({'params': model.pool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
|
||||
else:
|
||||
# regional, pooling parameter p weight decay should be 0,
|
||||
# and we want to add regional whitening if it is there
|
||||
if args.pool == 'gem':
|
||||
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*10, 'weight_decay': 0})
|
||||
elif args.pool == 'gemmp':
|
||||
parameters.append({'params': model.pool.rpool.parameters(), 'lr': args.lr*100, 'weight_decay': 0})
|
||||
if model.pool.whiten is not None:
|
||||
parameters.append({'params': model.pool.whiten.parameters()})
|
||||
# add final whitening if exists
|
||||
if model.whiten is not None:
|
||||
parameters.append({'params': model.whiten.parameters()})
|
||||
|
||||
# define optimizer
|
||||
if args.optimizer == 'sgd':
|
||||
optimizer = torch.optim.SGD(parameters, args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
elif args.optimizer == 'adam':
|
||||
optimizer = torch.optim.Adam(parameters, args.lr, weight_decay=args.weight_decay)
|
||||
|
||||
# define learning rate decay schedule
|
||||
# TODO: maybe pass as argument in future implementation?
|
||||
exp_decay = math.exp(-0.01)
|
||||
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay)
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
start_epoch = 0
|
||||
if args.resume:
|
||||
args.resume = os.path.join(args.directory, args.resume)
|
||||
if os.path.isfile(args.resume):
|
||||
# load checkpoint weights and update model and optimizer
|
||||
print(">> Loading checkpoint:\n>> '{}'".format(args.resume))
|
||||
checkpoint = torch.load(args.resume)
|
||||
start_epoch = checkpoint['epoch']
|
||||
min_loss = checkpoint['min_loss']
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||
print(">>>> loaded checkpoint:\n>>>> '{}' (epoch {})"
|
||||
.format(args.resume, checkpoint['epoch']))
|
||||
# important not to forget scheduler updating
|
||||
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=exp_decay, last_epoch=checkpoint['epoch']-1)
|
||||
else:
|
||||
print(">> No checkpoint found at '{}'".format(args.resume))
|
||||
|
||||
# Data loading code
|
||||
normalize = transforms.Normalize(mean=model.meta['mean'], std=model.meta['std'])
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
normalize,
|
||||
])
|
||||
train_dataset = TuplesDataset(
|
||||
name=args.training_dataset,
|
||||
mode='train',
|
||||
imsize=args.image_size,
|
||||
nnum=args.neg_num,
|
||||
qsize=args.query_size,
|
||||
poolsize=args.pool_size,
|
||||
transform=transform
|
||||
)
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, shuffle=True,
|
||||
num_workers=args.workers, pin_memory=True, sampler=None,
|
||||
drop_last=True, collate_fn=collate_tuples
|
||||
)
|
||||
if args.val:
|
||||
val_dataset = TuplesDataset(
|
||||
name=args.training_dataset,
|
||||
mode='val',
|
||||
imsize=args.image_size,
|
||||
nnum=args.neg_num,
|
||||
qsize=float('Inf'),
|
||||
poolsize=float('Inf'),
|
||||
transform=transform
|
||||
)
|
||||
val_loader = torch.utils.data.DataLoader(
|
||||
val_dataset, batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True,
|
||||
drop_last=True, collate_fn=collate_tuples
|
||||
)
|
||||
|
||||
# evaluate the network before starting
|
||||
# this might not be necessary?
|
||||
#test(args.test_datasets, model)
|
||||
|
||||
for epoch in range(start_epoch, args.epochs):
|
||||
|
||||
# set manual seeds per epoch
|
||||
np.random.seed(epoch)
|
||||
torch.manual_seed(epoch)
|
||||
torch.cuda.manual_seed_all(epoch)
|
||||
|
||||
# adjust learning rate for each epoch
|
||||
scheduler.step()
|
||||
# # debug printing to check if everything ok
|
||||
# lr_feat = optimizer.param_groups[0]['lr']
|
||||
# lr_pool = optimizer.param_groups[1]['lr']
|
||||
# print('>> Features lr: {:.2e}; Pooling lr: {:.2e}'.format(lr_feat, lr_pool))
|
||||
|
||||
# train for one epoch on train set
|
||||
loss = train(train_loader, model, criterion, optimizer, epoch)
|
||||
|
||||
# evaluate on validation set
|
||||
if args.val:
|
||||
with torch.no_grad():
|
||||
loss = validate(val_loader, model, criterion, epoch)
|
||||
|
||||
# evaluate on test datasets every test_freq epochs
|
||||
#if (epoch + 1) % args.test_freq == 0:
|
||||
# with torch.no_grad():
|
||||
# test(args.test_datasets, model)
|
||||
|
||||
# remember best loss and save checkpoint
|
||||
is_best = loss < min_loss
|
||||
min_loss = min(loss, min_loss)
|
||||
if (epoch+1)%10 == 0:
|
||||
save_checkpoint({
|
||||
'epoch': epoch + 1,
|
||||
'meta': model.meta,
|
||||
'state_dict': model.state_dict(),
|
||||
'min_loss': min_loss,
|
||||
'optimizer' : optimizer.state_dict(),
|
||||
}, is_best, args.directory)
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch):
|
||||
batch_time = AverageMeter()
|
||||
data_time = AverageMeter()
|
||||
losses = AverageMeter()
|
||||
|
||||
# create tuples for training
|
||||
avg_neg_distance = train_loader.dataset.create_epoch_tuples(model)
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
model.apply(set_batchnorm_eval)
|
||||
|
||||
# zero out gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
end = time.time()
|
||||
for i, (input, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
|
||||
nq = len(input) # number of training tuples
|
||||
ni = len(input[0]) # number of images per tuple
|
||||
|
||||
for q in range(nq):
|
||||
output = torch.zeros(model.meta['outputdim'], ni).cuda()
|
||||
for imi in range(ni):
|
||||
|
||||
# compute output vector for image imi
|
||||
output[:, imi] = model(input[q][imi].cuda()).squeeze()
|
||||
|
||||
# reducing memory consumption:
|
||||
# compute loss for this query tuple only
|
||||
# then, do backward pass for one tuple only
|
||||
# each backward pass gradients will be accumulated
|
||||
# the optimization step is performed for the full batch later
|
||||
loss = criterion(output, target[q].cuda())
|
||||
losses.update(loss.item())
|
||||
loss.backward()
|
||||
|
||||
if (i + 1) % args.update_every == 0:
|
||||
# do one step for multiple batches
|
||||
# accumulated gradients are used
|
||||
optimizer.step()
|
||||
# zero out gradients so we can
|
||||
# accumulate new ones over batches
|
||||
optimizer.zero_grad()
|
||||
# print('>> Train: [{0}][{1}/{2}]\t'
|
||||
# 'Weight update performed'.format(
|
||||
# epoch+1, i+1, len(train_loader)))
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(train_loader):
|
||||
print('>> Train: [{0}][{1}/{2}]\t'
|
||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
|
||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
|
||||
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
|
||||
epoch+1, i+1, len(train_loader), batch_time=batch_time,
|
||||
data_time=data_time, loss=losses))
|
||||
|
||||
return losses.avg
|
||||
|
||||
|
||||
def validate(val_loader, model, criterion, epoch):
|
||||
batch_time = AverageMeter()
|
||||
losses = AverageMeter()
|
||||
|
||||
# create tuples for validation
|
||||
avg_neg_distance = val_loader.dataset.create_epoch_tuples(model)
|
||||
|
||||
# switch to evaluate mode
|
||||
model.eval()
|
||||
|
||||
end = time.time()
|
||||
for i, (input, target) in enumerate(val_loader):
|
||||
|
||||
nq = len(input) # number of training tuples
|
||||
ni = len(input[0]) # number of images per tuple
|
||||
output = torch.zeros(model.meta['outputdim'], nq*ni).cuda()
|
||||
|
||||
for q in range(nq):
|
||||
for imi in range(ni):
|
||||
|
||||
# compute output vector for image imi of query q
|
||||
output[:, q*ni + imi] = model(input[q][imi].cuda()).squeeze()
|
||||
|
||||
# no need to reduce memory consumption (no backward pass):
|
||||
# compute loss for the full batch
|
||||
loss = criterion(output, torch.cat(target).cuda())
|
||||
|
||||
# record loss
|
||||
losses.update(loss.item()/nq, nq)
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if (i+1) % args.print_freq == 0 or i == 0 or (i+1) == len(val_loader):
|
||||
print('>> Val: [{0}][{1}/{2}]\t'
|
||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
|
||||
'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
|
||||
epoch+1, i+1, len(val_loader), batch_time=batch_time, loss=losses))
|
||||
|
||||
return losses.avg
|
||||
|
||||
def test(datasets, net):
|
||||
|
||||
print('>> Evaluating network on test datasets...')
|
||||
|
||||
# for testing we use image size of max 1024
|
||||
image_size = 1024
|
||||
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
# set up the transform
|
||||
normalize = transforms.Normalize(
|
||||
mean=net.meta['mean'],
|
||||
std=net.meta['std']
|
||||
)
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
normalize
|
||||
])
|
||||
|
||||
# compute whitening
|
||||
if args.test_whiten:
|
||||
start = time.time()
|
||||
|
||||
print('>> {}: Learning whitening...'.format(args.test_whiten))
|
||||
|
||||
# loading db
|
||||
db_root = os.path.join(get_data_root(), 'train', args.test_whiten)
|
||||
ims_root = os.path.join(db_root, 'ims')
|
||||
db_fn = os.path.join(db_root, '{}-whiten.pkl'.format(args.test_whiten))
|
||||
with open(db_fn, 'rb') as f:
|
||||
db = pickle.load(f)
|
||||
images = [cid2filename(db['cids'][i], ims_root) for i in range(len(db['cids']))]
|
||||
|
||||
# extract whitening vectors
|
||||
print('>> {}: Extracting...'.format(args.test_whiten))
|
||||
wvecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
|
||||
|
||||
# learning whitening
|
||||
print('>> {}: Learning...'.format(args.test_whiten))
|
||||
wvecs = wvecs.numpy()
|
||||
m, P = whitenlearn(wvecs, db['qidxs'], db['pidxs'])
|
||||
Lw = {'m': m, 'P': P}
|
||||
|
||||
print('>> {}: elapsed time: {}'.format(args.test_whiten, htime(time.time()-start)))
|
||||
else:
|
||||
Lw = None
|
||||
|
||||
# evaluate on test datasets
|
||||
datasets = args.test_datasets.split(',')
|
||||
for dataset in datasets:
|
||||
start = time.time()
|
||||
|
||||
print('>> {}: Extracting...'.format(dataset))
|
||||
|
||||
# prepare config structure for the test dataset
|
||||
cfg = configdataset(dataset, os.path.join(get_data_root(), 'test'))
|
||||
images = [cfg['im_fname'](cfg,i) for i in range(cfg['n'])]
|
||||
qimages = [cfg['qim_fname'](cfg,i) for i in range(cfg['nq'])]
|
||||
bbxs = [tuple(cfg['gnd'][i]['bbx']) for i in range(cfg['nq'])]
|
||||
|
||||
# extract database and query vectors
|
||||
print('>> {}: database images...'.format(dataset))
|
||||
vecs = extract_vectors(net, images, image_size, transform) # implemented with torch.no_grad
|
||||
print('>> {}: query images...'.format(dataset))
|
||||
qvecs = extract_vectors(net, qimages, image_size, transform, bbxs) # implemented with torch.no_grad
|
||||
|
||||
print('>> {}: Evaluating...'.format(dataset))
|
||||
|
||||
# convert to numpy
|
||||
vecs = vecs.numpy()
|
||||
qvecs = qvecs.numpy()
|
||||
|
||||
# search, rank, and print
|
||||
scores = np.dot(vecs.T, qvecs)
|
||||
ranks = np.argsort(-scores, axis=0)
|
||||
compute_map_and_print(dataset, ranks, cfg['gnd'])
|
||||
|
||||
if Lw is not None:
|
||||
# whiten the vectors
|
||||
vecs_lw = whitenapply(vecs, Lw['m'], Lw['P'])
|
||||
qvecs_lw = whitenapply(qvecs, Lw['m'], Lw['P'])
|
||||
|
||||
# search, rank, and print
|
||||
scores = np.dot(vecs_lw.T, qvecs_lw)
|
||||
ranks = np.argsort(-scores, axis=0)
|
||||
compute_map_and_print(dataset + ' + whiten', ranks, cfg['gnd'])
|
||||
|
||||
print('>> {}: elapsed time: {}'.format(dataset, htime(time.time()-start)))
|
||||
|
||||
|
||||
def save_checkpoint(state, is_best, directory):
|
||||
filename = os.path.join(directory, 'model_epoch%d.pth.tar' % state['epoch'])
|
||||
torch.save(state, filename)
|
||||
if is_best:
|
||||
filename_best = os.path.join(directory, 'model_best.pth.tar')
|
||||
shutil.copyfile(filename, filename_best)
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
|
||||
def set_batchnorm_eval(m):
|
||||
classname = m.__class__.__name__
|
||||
if classname.find('BatchNorm') != -1:
|
||||
# freeze running mean and std:
|
||||
# we do training one image at a time
|
||||
# so the statistics would not be per batch
|
||||
# hence we choose freezing (ie using imagenet statistics)
|
||||
m.eval()
|
||||
# # freeze parameters:
|
||||
# # in fact no need to freeze scale and bias
|
||||
# # they can be learned
|
||||
# # that is why next two lines are commented
|
||||
# for p in m.parameters():
|
||||
# p.requires_grad = False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
0
cirtorch/layers/__init__.py
Executable file
0
cirtorch/layers/__init__.py
Executable file
172
cirtorch/layers/functional.py
Executable file
172
cirtorch/layers/functional.py
Executable file
@ -0,0 +1,172 @@
|
||||
import math
|
||||
import pdb
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
# --------------------------------------
|
||||
# pooling
|
||||
# --------------------------------------
|
||||
|
||||
def mac(x):
|
||||
return F.max_pool2d(x, (x.size(-2), x.size(-1)))
|
||||
# return F.adaptive_max_pool2d(x, (1,1)) # alternative
|
||||
|
||||
|
||||
def spoc(x):
|
||||
return F.avg_pool2d(x, (x.size(-2), x.size(-1)))
|
||||
# return F.adaptive_avg_pool2d(x, (1,1)) # alternative
|
||||
|
||||
|
||||
def gem(x, p=3, eps=1e-6):
|
||||
return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
|
||||
# return F.lp_pool2d(F.threshold(x, eps, eps), p, (x.size(-2), x.size(-1))) # alternative
|
||||
|
||||
|
||||
def rmac(x, L=3, eps=1e-6):
|
||||
ovr = 0.4 # desired overlap of neighboring regions
|
||||
steps = torch.Tensor([2, 3, 4, 5, 6, 7]) # possible regions for the long dimension
|
||||
|
||||
W = x.size(3)
|
||||
H = x.size(2)
|
||||
|
||||
w = min(W, H)
|
||||
w2 = math.floor(w/2.0 - 1)
|
||||
|
||||
b = (max(H, W)-w)/(steps-1)
|
||||
(tmp, idx) = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension
|
||||
|
||||
# region overplus per dimension
|
||||
Wd = 0;
|
||||
Hd = 0;
|
||||
if H < W:
|
||||
Wd = idx.item() + 1
|
||||
elif H > W:
|
||||
Hd = idx.item() + 1
|
||||
|
||||
v = F.max_pool2d(x, (x.size(-2), x.size(-1)))
|
||||
v = v / (torch.norm(v, p=2, dim=1, keepdim=True) + eps).expand_as(v)
|
||||
|
||||
for l in range(1, L+1):
|
||||
wl = math.floor(2*w/(l+1))
|
||||
wl2 = math.floor(wl/2 - 1)
|
||||
|
||||
if l+Wd == 1:
|
||||
b = 0
|
||||
else:
|
||||
b = (W-wl)/(l+Wd-1)
|
||||
cenW = torch.floor(wl2 + torch.Tensor(range(l-1+Wd+1))*b) - wl2 # center coordinates
|
||||
if l+Hd == 1:
|
||||
b = 0
|
||||
else:
|
||||
b = (H-wl)/(l+Hd-1)
|
||||
cenH = torch.floor(wl2 + torch.Tensor(range(l-1+Hd+1))*b) - wl2 # center coordinates
|
||||
|
||||
for i_ in cenH.tolist():
|
||||
for j_ in cenW.tolist():
|
||||
if wl == 0:
|
||||
continue
|
||||
R = x[:,:,(int(i_)+torch.Tensor(range(wl)).long()).tolist(),:]
|
||||
R = R[:,:,:,(int(j_)+torch.Tensor(range(wl)).long()).tolist()]
|
||||
vt = F.max_pool2d(R, (R.size(-2), R.size(-1)))
|
||||
vt = vt / (torch.norm(vt, p=2, dim=1, keepdim=True) + eps).expand_as(vt)
|
||||
v += vt
|
||||
|
||||
return v
|
||||
|
||||
|
||||
def roipool(x, rpool, L=3, eps=1e-6):
|
||||
ovr = 0.4 # desired overlap of neighboring regions
|
||||
steps = torch.Tensor([2, 3, 4, 5, 6, 7]) # possible regions for the long dimension
|
||||
|
||||
W = x.size(3)
|
||||
H = x.size(2)
|
||||
|
||||
w = min(W, H)
|
||||
w2 = math.floor(w/2.0 - 1)
|
||||
|
||||
b = (max(H, W)-w)/(steps-1)
|
||||
_, idx = torch.min(torch.abs(((w**2 - w*b)/w**2)-ovr), 0) # steps(idx) regions for long dimension
|
||||
|
||||
# region overplus per dimension
|
||||
Wd = 0;
|
||||
Hd = 0;
|
||||
if H < W:
|
||||
Wd = idx.item() + 1
|
||||
elif H > W:
|
||||
Hd = idx.item() + 1
|
||||
|
||||
vecs = []
|
||||
vecs.append(rpool(x).unsqueeze(1))
|
||||
|
||||
for l in range(1, L+1):
|
||||
wl = math.floor(2*w/(l+1))
|
||||
wl2 = math.floor(wl/2 - 1)
|
||||
|
||||
if l+Wd == 1:
|
||||
b = 0
|
||||
else:
|
||||
b = (W-wl)/(l+Wd-1)
|
||||
cenW = torch.floor(wl2 + torch.Tensor(range(l-1+Wd+1))*b).int() - wl2 # center coordinates
|
||||
if l+Hd == 1:
|
||||
b = 0
|
||||
else:
|
||||
b = (H-wl)/(l+Hd-1)
|
||||
cenH = torch.floor(wl2 + torch.Tensor(range(l-1+Hd+1))*b).int() - wl2 # center coordinates
|
||||
|
||||
for i_ in cenH.tolist():
|
||||
for j_ in cenW.tolist():
|
||||
if wl == 0:
|
||||
continue
|
||||
vecs.append(rpool(x.narrow(2,i_,wl).narrow(3,j_,wl)).unsqueeze(1))
|
||||
|
||||
return torch.cat(vecs, dim=1)
|
||||
|
||||
|
||||
# --------------------------------------
|
||||
# normalization
|
||||
# --------------------------------------
|
||||
|
||||
def l2n(x, eps=1e-6):
|
||||
return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)
|
||||
|
||||
def powerlaw(x, eps=1e-6):
|
||||
x = x + self.eps
|
||||
return x.abs().sqrt().mul(x.sign())
|
||||
|
||||
# --------------------------------------
|
||||
# loss
|
||||
# --------------------------------------
|
||||
|
||||
def contrastive_loss(x, label, margin=0.7, eps=1e-6):
|
||||
# x is D x N
|
||||
dim = x.size(0) # D
|
||||
nq = torch.sum(label.data==-1) # number of tuples
|
||||
S = x.size(1) // nq # number of images per tuple including query: 1+1+n
|
||||
|
||||
x1 = x[:, ::S].permute(1,0).repeat(1,S-1).view((S-1)*nq,dim).permute(1,0)
|
||||
idx = [i for i in range(len(label)) if label.data[i] != -1]
|
||||
x2 = x[:, idx]
|
||||
lbl = label[label!=-1]
|
||||
|
||||
dif = x1 - x2
|
||||
D = torch.pow(dif+eps, 2).sum(dim=0).sqrt()
|
||||
|
||||
y = 0.5*lbl*torch.pow(D,2) + 0.5*(1-lbl)*torch.pow(torch.clamp(margin-D, min=0),2)
|
||||
y = torch.sum(y)
|
||||
return y
|
||||
|
||||
def triplet_loss(x, label, margin=0.1):
|
||||
# x is D x N
|
||||
dim = x.size(0) # D
|
||||
nq = torch.sum(label.data==-1).item() # number of tuples
|
||||
S = x.size(1) // nq # number of images per tuple including query: 1+1+n
|
||||
|
||||
xa = x[:, label.data==-1].permute(1,0).repeat(1,S-2).view((S-2)*nq,dim).permute(1,0)
|
||||
xp = x[:, label.data==1].permute(1,0).repeat(1,S-2).view((S-2)*nq,dim).permute(1,0)
|
||||
xn = x[:, label.data==0]
|
||||
|
||||
dist_pos = torch.sum(torch.pow(xa - xp, 2), dim=0)
|
||||
dist_neg = torch.sum(torch.pow(xa - xn, 2), dim=0)
|
||||
|
||||
return torch.sum(torch.clamp(dist_pos - dist_neg + margin, min=0))
|
48
cirtorch/layers/loss.py
Executable file
48
cirtorch/layers/loss.py
Executable file
@ -0,0 +1,48 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import cirtorch.layers.functional as LF
|
||||
|
||||
# --------------------------------------
|
||||
# Loss/Error layers
|
||||
# --------------------------------------
|
||||
|
||||
class ContrastiveLoss(nn.Module):
|
||||
r"""CONTRASTIVELOSS layer that computes contrastive loss for a batch of images:
|
||||
Q query tuples, each packed in the form of (q,p,n1,..nN)
|
||||
|
||||
Args:
|
||||
x: tuples arranges in columns as [q,p,n1,nN, ... ]
|
||||
label: -1 for query, 1 for corresponding positive, 0 for corresponding negative
|
||||
margin: contrastive loss margin. Default: 0.7
|
||||
|
||||
>>> contrastive_loss = ContrastiveLoss(margin=0.7)
|
||||
>>> input = torch.randn(128, 35, requires_grad=True)
|
||||
>>> label = torch.Tensor([-1, 1, 0, 0, 0, 0, 0] * 5)
|
||||
>>> output = contrastive_loss(input, label)
|
||||
>>> output.backward()
|
||||
"""
|
||||
|
||||
def __init__(self, margin=0.7, eps=1e-6):
|
||||
super(ContrastiveLoss, self).__init__()
|
||||
self.margin = margin
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x, label):
|
||||
return LF.contrastive_loss(x, label, margin=self.margin, eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'margin=' + '{:.4f}'.format(self.margin) + ')'
|
||||
|
||||
|
||||
class TripletLoss(nn.Module):
|
||||
|
||||
def __init__(self, margin=0.1):
|
||||
super(TripletLoss, self).__init__()
|
||||
self.margin = margin
|
||||
|
||||
def forward(self, x, label):
|
||||
return LF.triplet_loss(x, label, margin=self.margin)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'margin=' + '{:.4f}'.format(self.margin) + ')'
|
33
cirtorch/layers/normalization.py
Executable file
33
cirtorch/layers/normalization.py
Executable file
@ -0,0 +1,33 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import cirtorch.layers.functional as LF
|
||||
|
||||
# --------------------------------------
|
||||
# Normalization layers
|
||||
# --------------------------------------
|
||||
|
||||
class L2N(nn.Module):
|
||||
|
||||
def __init__(self, eps=1e-6):
|
||||
super(L2N,self).__init__()
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return LF.l2n(x, eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'
|
||||
|
||||
|
||||
class PowerLaw(nn.Module):
|
||||
|
||||
def __init__(self, eps=1e-6):
|
||||
super(PowerLaw, self).__init__()
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return LF.powerlaw(x, eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'
|
113
cirtorch/layers/pooling.py
Executable file
113
cirtorch/layers/pooling.py
Executable file
@ -0,0 +1,113 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
import cirtorch.layers.functional as LF
|
||||
from cirtorch.layers.normalization import L2N
|
||||
|
||||
# --------------------------------------
|
||||
# Pooling layers
|
||||
# --------------------------------------
|
||||
|
||||
class MAC(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(MAC,self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return LF.mac(x)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '()'
|
||||
|
||||
|
||||
class SPoC(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(SPoC,self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return LF.spoc(x)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '()'
|
||||
|
||||
|
||||
class GeM(nn.Module):
|
||||
|
||||
def __init__(self, p=3, eps=1e-6):
|
||||
super(GeM,self).__init__()
|
||||
self.p = Parameter(torch.ones(1)*p)
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return LF.gem(x, p=self.p, eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'
|
||||
|
||||
class GeMmp(nn.Module):
|
||||
|
||||
def __init__(self, p=3, mp=1, eps=1e-6):
|
||||
super(GeMmp,self).__init__()
|
||||
self.p = Parameter(torch.ones(mp)*p)
|
||||
self.mp = mp
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return LF.gem(x, p=self.p.unsqueeze(-1).unsqueeze(-1), eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'p=' + '[{}]'.format(self.mp) + ', ' + 'eps=' + str(self.eps) + ')'
|
||||
|
||||
class RMAC(nn.Module):
|
||||
|
||||
def __init__(self, L=3, eps=1e-6):
|
||||
super(RMAC,self).__init__()
|
||||
self.L = L
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return LF.rmac(x, L=self.L, eps=self.eps)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(' + 'L=' + '{}'.format(self.L) + ')'
|
||||
|
||||
|
||||
class Rpool(nn.Module):
|
||||
|
||||
def __init__(self, rpool, whiten=None, L=3, eps=1e-6):
|
||||
super(Rpool,self).__init__()
|
||||
self.rpool = rpool
|
||||
self.L = L
|
||||
self.whiten = whiten
|
||||
self.norm = L2N()
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x, aggregate=True):
|
||||
# features -> roipool
|
||||
o = LF.roipool(x, self.rpool, self.L, self.eps) # size: #im, #reg, D, 1, 1
|
||||
|
||||
# concatenate regions from all images in the batch
|
||||
s = o.size()
|
||||
o = o.view(s[0]*s[1], s[2], s[3], s[4]) # size: #im x #reg, D, 1, 1
|
||||
|
||||
# rvecs -> norm
|
||||
o = self.norm(o)
|
||||
|
||||
# rvecs -> whiten -> norm
|
||||
if self.whiten is not None:
|
||||
o = self.norm(self.whiten(o.squeeze(-1).squeeze(-1)))
|
||||
|
||||
# reshape back to regions per image
|
||||
o = o.view(s[0], s[1], s[2], s[3], s[4]) # size: #im, #reg, D, 1, 1
|
||||
|
||||
# aggregate regions into a single global vector per image
|
||||
if aggregate:
|
||||
# rvecs -> sumpool -> norm
|
||||
o = self.norm(o.sum(1, keepdim=False)) # size: #im, D, 1, 1
|
||||
|
||||
return o
|
||||
|
||||
def __repr__(self):
|
||||
return super(Rpool, self).__repr__() + '(' + 'L=' + '{}'.format(self.L) + ')'
|
0
cirtorch/networks/__init__.py
Executable file
0
cirtorch/networks/__init__.py
Executable file
427
cirtorch/networks/imageretrievalnet.py
Executable file
427
cirtorch/networks/imageretrievalnet.py
Executable file
@ -0,0 +1,427 @@
|
||||
import os
|
||||
import pdb
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
|
||||
import torchvision
|
||||
|
||||
from cirtorch.layers.pooling import MAC, SPoC, GeM, GeMmp, RMAC, Rpool
|
||||
from cirtorch.layers.normalization import L2N, PowerLaw
|
||||
from cirtorch.datasets.genericdataset import ImagesFromList
|
||||
from cirtorch.utils.general import get_data_root
|
||||
from cirtorch.datasets.datahelpers import default_loader, imresize
|
||||
from PIL import Image
|
||||
#from ModelHelper.Common.CommonUtils.ImageAugmentation import Padding
|
||||
import cv2
|
||||
|
||||
# for some models, we have imported features (convolutions) from caffe because the image retrieval performance is higher for them
|
||||
FEATURES = {
|
||||
'vgg16': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-vgg16-features-d369c8e.pth',
|
||||
'resnet50': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet50-features-ac468af.pth',
|
||||
'resnet101': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet101-features-10a101d.pth',
|
||||
'resnet152': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet152-features-1011020.pth',
|
||||
}
|
||||
|
||||
# TODO: pre-compute for more architectures and properly test variations (pre l2norm, post l2norm)
|
||||
# pre-computed local pca whitening that can be applied before the pooling layer
|
||||
L_WHITENING = {
|
||||
'resnet101': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-9f830ef.pth',
|
||||
# no pre l2 norm
|
||||
# 'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-da5c935.pth', # with pre l2 norm
|
||||
}
|
||||
|
||||
# possible global pooling layers, each on of these can be made regional
|
||||
POOLING = {
|
||||
'mac': MAC,
|
||||
'spoc': SPoC,
|
||||
'gem': GeM,
|
||||
'gemmp': GeMmp,
|
||||
'rmac': RMAC,
|
||||
}
|
||||
|
||||
# TODO: pre-compute for: resnet50-gem-r, resnet50-mac-r, vgg16-mac-r, alexnet-mac-r
|
||||
# pre-computed regional whitening, for most commonly used architectures and pooling methods
|
||||
R_WHITENING = {
|
||||
'alexnet-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-rwhiten-c8cf7e2.pth',
|
||||
'vgg16-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-rwhiten-19b204e.pth',
|
||||
'resnet101-mac-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-rwhiten-7f1ed8c.pth',
|
||||
'resnet101-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-rwhiten-adace84.pth',
|
||||
}
|
||||
|
||||
# TODO: pre-compute for more architectures
|
||||
# pre-computed final (global) whitening, for most commonly used architectures and pooling methods
|
||||
WHITENING = {
|
||||
'alexnet-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-whiten-454ad53.pth',
|
||||
'alexnet-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-whiten-4c9126b.pth',
|
||||
'vgg16-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-whiten-eaa6695.pth',
|
||||
'vgg16-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-whiten-83582df.pth',
|
||||
'resnet50-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet50-gem-whiten-f15da7b.pth',
|
||||
'resnet101-mac-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-whiten-9df41d3.pth',
|
||||
'resnet101-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-whiten-22ab0c1.pth',
|
||||
'resnet101-gem-r': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-whiten-b379c0a.pth',
|
||||
'resnet101-gemmp': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gemmp-whiten-770f53c.pth',
|
||||
'resnet152-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet152-gem-whiten-abe7b93.pth',
|
||||
'densenet121-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet121-gem-whiten-79e3eea.pth',
|
||||
'densenet169-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet169-gem-whiten-6b2a76a.pth',
|
||||
'densenet201-gem': 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet201-gem-whiten-22ea45c.pth',
|
||||
}
|
||||
|
||||
# output dimensionality for supported architectures
|
||||
OUTPUT_DIM = {
|
||||
'alexnet': 256,
|
||||
'vgg11': 512,
|
||||
'vgg13': 512,
|
||||
'vgg16': 512,
|
||||
'vgg19': 512,
|
||||
'resnet18': 512,
|
||||
'resnet34': 512,
|
||||
'resnet50': 2048,
|
||||
'resnet101': 2048,
|
||||
'resnet152': 2048,
|
||||
'densenet121': 1024,
|
||||
'densenet169': 1664,
|
||||
'densenet201': 1920,
|
||||
'densenet161': 2208, # largest densenet
|
||||
'squeezenet1_0': 512,
|
||||
'squeezenet1_1': 512,
|
||||
}
|
||||
|
||||
|
||||
class ImageRetrievalNet(nn.Module):
|
||||
|
||||
def __init__(self, features, lwhiten, pool, whiten, meta):
|
||||
super(ImageRetrievalNet, self).__init__()
|
||||
self.features = nn.Sequential(*features)
|
||||
self.lwhiten = lwhiten
|
||||
self.pool = pool
|
||||
self.whiten = whiten
|
||||
self.norm = L2N()
|
||||
self.meta = meta
|
||||
|
||||
def forward(self, x):
|
||||
# x -> features
|
||||
o = self.features(x)
|
||||
|
||||
# TODO: properly test (with pre-l2norm and/or post-l2norm)
|
||||
# if lwhiten exist: features -> local whiten
|
||||
if self.lwhiten is not None:
|
||||
# o = self.norm(o)
|
||||
s = o.size()
|
||||
o = o.permute(0, 2, 3, 1).contiguous().view(-1, s[1])
|
||||
o = self.lwhiten(o)
|
||||
o = o.view(s[0], s[2], s[3], self.lwhiten.out_features).permute(0, 3, 1, 2)
|
||||
# o = self.norm(o)
|
||||
|
||||
# features -> pool -> norm
|
||||
o = self.norm(self.pool(o)).squeeze(-1).squeeze(-1)
|
||||
|
||||
# if whiten exist: pooled features -> whiten -> norm
|
||||
if self.whiten is not None:
|
||||
o = self.norm(self.whiten(o))
|
||||
|
||||
# permute so that it is Dx1 column vector per image (DxN if many images)
|
||||
return o.permute(1, 0)
|
||||
|
||||
def __repr__(self):
|
||||
tmpstr = super(ImageRetrievalNet, self).__repr__()[:-1]
|
||||
tmpstr += self.meta_repr()
|
||||
tmpstr = tmpstr + ')'
|
||||
return tmpstr
|
||||
|
||||
def meta_repr(self):
|
||||
tmpstr = ' (' + 'meta' + '): dict( \n' # + self.meta.__repr__() + '\n'
|
||||
tmpstr += ' architecture: {}\n'.format(self.meta['architecture'])
|
||||
tmpstr += ' local_whitening: {}\n'.format(self.meta['local_whitening'])
|
||||
tmpstr += ' pooling: {}\n'.format(self.meta['pooling'])
|
||||
tmpstr += ' regional: {}\n'.format(self.meta['regional'])
|
||||
tmpstr += ' whitening: {}\n'.format(self.meta['whitening'])
|
||||
tmpstr += ' outputdim: {}\n'.format(self.meta['outputdim'])
|
||||
tmpstr += ' mean: {}\n'.format(self.meta['mean'])
|
||||
tmpstr += ' std: {}\n'.format(self.meta['std'])
|
||||
tmpstr = tmpstr + ' )\n'
|
||||
return tmpstr
|
||||
|
||||
|
||||
def init_network(params):
|
||||
# parse params with default values
|
||||
architecture = params.get('architecture', 'resnet101')
|
||||
local_whitening = params.get('local_whitening', False)
|
||||
pooling = params.get('pooling', 'gem')
|
||||
regional = params.get('regional', False)
|
||||
whitening = params.get('whitening', False)
|
||||
mean = params.get('mean', [0.485, 0.456, 0.406])
|
||||
std = params.get('std', [0.229, 0.224, 0.225])
|
||||
pretrained = params.get('pretrained', True)
|
||||
|
||||
# get output dimensionality size
|
||||
dim = OUTPUT_DIM[architecture]
|
||||
|
||||
# loading network from torchvision
|
||||
if pretrained:
|
||||
if architecture not in FEATURES:
|
||||
# initialize with network pretrained on imagenet in pytorch
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=True)
|
||||
else:
|
||||
# initialize with random weights, later on we will fill features with custom pretrained network
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=False)
|
||||
else:
|
||||
# initialize with random weights
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=False)
|
||||
|
||||
# initialize features
|
||||
# take only convolutions for features,
|
||||
# always ends with ReLU to make last activations non-negative
|
||||
if architecture.startswith('alexnet'):
|
||||
features = list(net_in.features.children())[:-1]
|
||||
elif architecture.startswith('vgg'):
|
||||
features = list(net_in.features.children())[:-1]
|
||||
elif architecture.startswith('resnet'):
|
||||
features = list(net_in.children())[:-2]
|
||||
elif architecture.startswith('densenet'):
|
||||
features = list(net_in.features.children())
|
||||
features.append(nn.ReLU(inplace=True))
|
||||
elif architecture.startswith('squeezenet'):
|
||||
features = list(net_in.features.children())
|
||||
else:
|
||||
raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))
|
||||
|
||||
# initialize local whitening
|
||||
if local_whitening:
|
||||
lwhiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: lwhiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
lw = architecture
|
||||
if lw in L_WHITENING:
|
||||
print(">> {}: for '{}' custom computed local whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), lw, os.path.basename(L_WHITENING[lw])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
lwhiten.load_state_dict(model_zoo.load_url(L_WHITENING[lw], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no local whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), lw))
|
||||
|
||||
else:
|
||||
lwhiten = None
|
||||
|
||||
# initialize pooling
|
||||
if pooling == 'gemmp':
|
||||
pool = POOLING[pooling](mp=dim)
|
||||
else:
|
||||
pool = POOLING[pooling]()
|
||||
|
||||
# initialize regional pooling
|
||||
if regional:
|
||||
rpool = pool
|
||||
rwhiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: rwhiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
rw = '{}-{}-r'.format(architecture, pooling)
|
||||
if rw in R_WHITENING:
|
||||
print(">> {}: for '{}' custom computed regional whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), rw, os.path.basename(R_WHITENING[rw])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
rwhiten.load_state_dict(model_zoo.load_url(R_WHITENING[rw], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no regional whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), rw))
|
||||
|
||||
pool = Rpool(rpool, rwhiten)
|
||||
|
||||
# initialize whitening
|
||||
if whitening:
|
||||
whiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: whiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
w = architecture
|
||||
if local_whitening:
|
||||
w += '-lw'
|
||||
w += '-' + pooling
|
||||
if regional:
|
||||
w += '-r'
|
||||
if w in WHITENING:
|
||||
print(">> {}: for '{}' custom computed whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), w, os.path.basename(WHITENING[w])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
whiten.load_state_dict(model_zoo.load_url(WHITENING[w], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), w))
|
||||
else:
|
||||
whiten = None
|
||||
|
||||
# create meta information to be stored in the network
|
||||
meta = {
|
||||
'architecture': architecture,
|
||||
'local_whitening': local_whitening,
|
||||
'pooling': pooling,
|
||||
'regional': regional,
|
||||
'whitening': whitening,
|
||||
'mean': mean,
|
||||
'std': std,
|
||||
'outputdim': dim,
|
||||
}
|
||||
|
||||
# create a generic image retrieval network
|
||||
net = ImageRetrievalNet(features, lwhiten, pool, whiten, meta)
|
||||
|
||||
# initialize features with custom pretrained network if needed
|
||||
if pretrained and architecture in FEATURES:
|
||||
print(">> {}: for '{}' custom pretrained features '{}' are used"
|
||||
.format(os.path.basename(__file__), architecture, os.path.basename(FEATURES[architecture])))
|
||||
model_dir = os.path.join(get_data_root(), 'networks')
|
||||
net.features.load_state_dict(model_zoo.load_url(FEATURES[architecture], model_dir=model_dir))
|
||||
|
||||
return net
|
||||
|
||||
def extract_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
if torch.cuda.is_available():
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=1, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = torch.zeros(net.meta['outputdim'], len(images))
|
||||
img_paths = list()
|
||||
for i, (input, path) in enumerate(loader):
|
||||
#print(i)
|
||||
if torch.cuda.is_available():
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1 and ms[0] == 1:
|
||||
vecs[:, i] = extract_ss(net, input)
|
||||
else:
|
||||
vecs[:, i] = extract_ms(net, input, ms, msp)
|
||||
img_paths.append(path)
|
||||
|
||||
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
|
||||
imgs = list()
|
||||
for one in img_paths:
|
||||
imgs += one
|
||||
return vecs, imgs
|
||||
|
||||
def extract_vectors_o(net, image, size, tranform, bbxs = None, ms=[1], msp = 1, print_freq=10):
|
||||
if torch.cuda.is_available():
|
||||
net.cuda()
|
||||
net.eval()
|
||||
#image = cv2.resize(image, (size, size))
|
||||
if type(image) == np.ndarray:
|
||||
image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
||||
image = imresize(image, size)
|
||||
image = tranform(image)
|
||||
image = image.unsqueeze(0)
|
||||
#print('image>>>>>>>', image)
|
||||
#print('image>>>>>>>', image.shape)
|
||||
with torch.no_grad():
|
||||
#vecs = torch.zeros(net.meta['outputdim'], len(image))
|
||||
if torch.cuda.is_available():
|
||||
image = image.cuda()
|
||||
if len(ms) == 1 and ms[0] == 1:
|
||||
vecs = extract_ss(net, image)
|
||||
else:
|
||||
vecs = extract_ms(net, image, ms, msp)
|
||||
return vecs
|
||||
|
||||
def extract_ss(net, input):
|
||||
#return net(input).cpu().data.squeeze()
|
||||
return net(input).cuda().data.squeeze()
|
||||
|
||||
|
||||
def extract_ms(net, input, ms, msp):
|
||||
v = torch.zeros(net.meta['outputdim'])
|
||||
|
||||
for s in ms:
|
||||
if s == 1:
|
||||
input_t = input.clone()
|
||||
else:
|
||||
input_t = nn.functional.interpolate(input, scale_factor=s, mode='bilinear', align_corners=False)
|
||||
v += net(input_t).pow(msp).cpu().data.squeeze()
|
||||
|
||||
v /= len(ms)
|
||||
v = v.pow(1. / msp)
|
||||
v /= v.norm()
|
||||
|
||||
return v
|
||||
|
||||
|
||||
def extract_regional_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = []
|
||||
for i, input in enumerate(loader):
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1:
|
||||
vecs.append(extract_ssr(net, input))
|
||||
else:
|
||||
# TODO: not implemented yet
|
||||
# vecs.append(extract_msr(net, input, ms, msp))
|
||||
raise NotImplementedError
|
||||
|
||||
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
|
||||
print('')
|
||||
|
||||
return vecs
|
||||
|
||||
|
||||
def extract_ssr(net, input):
|
||||
return net.pool(net.features(input), aggregate=False).squeeze(0).squeeze(-1).squeeze(-1).permute(1, 0).cpu().data
|
||||
|
||||
|
||||
def extract_local_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = []
|
||||
for i, input in enumerate(loader):
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1:
|
||||
vecs.append(extract_ssl(net, input))
|
||||
else:
|
||||
# TODO: not implemented yet
|
||||
# vecs.append(extract_msl(net, input, ms, msp))
|
||||
raise NotImplementedError
|
||||
|
||||
if (i + 1) % print_freq == 0 or (i + 1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i + 1), len(images)), end='')
|
||||
print('')
|
||||
|
||||
return vecs
|
||||
|
||||
|
||||
def extract_ssl(net, input):
|
||||
return net.norm(net.features(input)).squeeze(0).view(net.meta['outputdim'], -1).cpu().data
|
392
cirtorch/networks/imageretrievalnet_cpu.py
Normal file
392
cirtorch/networks/imageretrievalnet_cpu.py
Normal file
@ -0,0 +1,392 @@
|
||||
import os
|
||||
import pdb
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
|
||||
import torchvision
|
||||
|
||||
from cirtorch.layers.pooling import MAC, SPoC, GeM, GeMmp, RMAC, Rpool
|
||||
from cirtorch.layers.normalization import L2N, PowerLaw
|
||||
from cirtorch.datasets.genericdataset import ImagesFromList
|
||||
from cirtorch.utils.general import get_data_root
|
||||
|
||||
# for some models, we have imported features (convolutions) from caffe because the image retrieval performance is higher for them
|
||||
FEATURES = {
|
||||
'vgg16' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-vgg16-features-d369c8e.pth',
|
||||
'resnet50' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet50-features-ac468af.pth',
|
||||
'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet101-features-10a101d.pth',
|
||||
'resnet152' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/imagenet/imagenet-caffe-resnet152-features-1011020.pth',
|
||||
}
|
||||
|
||||
# TODO: pre-compute for more architectures and properly test variations (pre l2norm, post l2norm)
|
||||
# pre-computed local pca whitening that can be applied before the pooling layer
|
||||
L_WHITENING = {
|
||||
'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-9f830ef.pth', # no pre l2 norm
|
||||
# 'resnet101' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-lwhiten-da5c935.pth', # with pre l2 norm
|
||||
}
|
||||
|
||||
# possible global pooling layers, each on of these can be made regional
|
||||
POOLING = {
|
||||
'mac' : MAC,
|
||||
'spoc' : SPoC,
|
||||
'gem' : GeM,
|
||||
'gemmp' : GeMmp,
|
||||
'rmac' : RMAC,
|
||||
}
|
||||
|
||||
# TODO: pre-compute for: resnet50-gem-r, resnet50-mac-r, vgg16-mac-r, alexnet-mac-r
|
||||
# pre-computed regional whitening, for most commonly used architectures and pooling methods
|
||||
R_WHITENING = {
|
||||
'alexnet-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-rwhiten-c8cf7e2.pth',
|
||||
'vgg16-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-rwhiten-19b204e.pth',
|
||||
'resnet101-mac-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-rwhiten-7f1ed8c.pth',
|
||||
'resnet101-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-rwhiten-adace84.pth',
|
||||
}
|
||||
|
||||
# TODO: pre-compute for more architectures
|
||||
# pre-computed final (global) whitening, for most commonly used architectures and pooling methods
|
||||
WHITENING = {
|
||||
'alexnet-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-whiten-454ad53.pth',
|
||||
'alexnet-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-alexnet-gem-r-whiten-4c9126b.pth',
|
||||
'vgg16-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-whiten-eaa6695.pth',
|
||||
'vgg16-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-vgg16-gem-r-whiten-83582df.pth',
|
||||
'resnet50-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet50-gem-whiten-f15da7b.pth',
|
||||
'resnet101-mac-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-mac-r-whiten-9df41d3.pth',
|
||||
'resnet101-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-whiten-22ab0c1.pth',
|
||||
'resnet101-gem-r' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gem-r-whiten-b379c0a.pth',
|
||||
'resnet101-gemmp' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet101-gemmp-whiten-770f53c.pth',
|
||||
'resnet152-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-resnet152-gem-whiten-abe7b93.pth',
|
||||
'densenet121-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet121-gem-whiten-79e3eea.pth',
|
||||
'densenet169-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet169-gem-whiten-6b2a76a.pth',
|
||||
'densenet201-gem' : 'http://cmp.felk.cvut.cz/cnnimageretrieval/data/whiten/retrieval-SfM-120k/retrieval-SfM-120k-densenet201-gem-whiten-22ea45c.pth',
|
||||
}
|
||||
|
||||
# output dimensionality for supported architectures
|
||||
OUTPUT_DIM = {
|
||||
'alexnet' : 256,
|
||||
'vgg11' : 512,
|
||||
'vgg13' : 512,
|
||||
'vgg16' : 512,
|
||||
'vgg19' : 512,
|
||||
'resnet18' : 512,
|
||||
'resnet34' : 512,
|
||||
'resnet50' : 2048,
|
||||
'resnet101' : 2048,
|
||||
'resnet152' : 2048,
|
||||
'densenet121' : 1024,
|
||||
'densenet169' : 1664,
|
||||
'densenet201' : 1920,
|
||||
'densenet161' : 2208, # largest densenet
|
||||
'squeezenet1_0' : 512,
|
||||
'squeezenet1_1' : 512,
|
||||
}
|
||||
|
||||
|
||||
class ImageRetrievalNet(nn.Module):
|
||||
|
||||
def __init__(self, features, lwhiten, pool, whiten, meta):
|
||||
super(ImageRetrievalNet, self).__init__()
|
||||
self.features = nn.Sequential(*features)
|
||||
self.lwhiten = lwhiten
|
||||
self.pool = pool
|
||||
self.whiten = whiten
|
||||
self.norm = L2N()
|
||||
self.meta = meta
|
||||
|
||||
def forward(self, x):
|
||||
# x -> features
|
||||
o = self.features(x)
|
||||
|
||||
# TODO: properly test (with pre-l2norm and/or post-l2norm)
|
||||
# if lwhiten exist: features -> local whiten
|
||||
if self.lwhiten is not None:
|
||||
# o = self.norm(o)
|
||||
s = o.size()
|
||||
o = o.permute(0,2,3,1).contiguous().view(-1, s[1])
|
||||
o = self.lwhiten(o)
|
||||
o = o.view(s[0],s[2],s[3],self.lwhiten.out_features).permute(0,3,1,2)
|
||||
# o = self.norm(o)
|
||||
|
||||
# features -> pool -> norm
|
||||
o = self.norm(self.pool(o)).squeeze(-1).squeeze(-1)
|
||||
|
||||
# if whiten exist: pooled features -> whiten -> norm
|
||||
if self.whiten is not None:
|
||||
o = self.norm(self.whiten(o))
|
||||
|
||||
# permute so that it is Dx1 column vector per image (DxN if many images)
|
||||
return o.permute(1,0)
|
||||
|
||||
def __repr__(self):
|
||||
tmpstr = super(ImageRetrievalNet, self).__repr__()[:-1]
|
||||
tmpstr += self.meta_repr()
|
||||
tmpstr = tmpstr + ')'
|
||||
return tmpstr
|
||||
|
||||
def meta_repr(self):
|
||||
tmpstr = ' (' + 'meta' + '): dict( \n' # + self.meta.__repr__() + '\n'
|
||||
tmpstr += ' architecture: {}\n'.format(self.meta['architecture'])
|
||||
tmpstr += ' local_whitening: {}\n'.format(self.meta['local_whitening'])
|
||||
tmpstr += ' pooling: {}\n'.format(self.meta['pooling'])
|
||||
tmpstr += ' regional: {}\n'.format(self.meta['regional'])
|
||||
tmpstr += ' whitening: {}\n'.format(self.meta['whitening'])
|
||||
tmpstr += ' outputdim: {}\n'.format(self.meta['outputdim'])
|
||||
tmpstr += ' mean: {}\n'.format(self.meta['mean'])
|
||||
tmpstr += ' std: {}\n'.format(self.meta['std'])
|
||||
tmpstr = tmpstr + ' )\n'
|
||||
return tmpstr
|
||||
|
||||
|
||||
def init_network(params):
|
||||
|
||||
# parse params with default values
|
||||
architecture = params.get('architecture', 'resnet101')
|
||||
local_whitening = params.get('local_whitening', False)
|
||||
pooling = params.get('pooling', 'gem')
|
||||
regional = params.get('regional', False)
|
||||
whitening = params.get('whitening', False)
|
||||
mean = params.get('mean', [0.485, 0.456, 0.406])
|
||||
std = params.get('std', [0.229, 0.224, 0.225])
|
||||
pretrained = params.get('pretrained', True)
|
||||
|
||||
# get output dimensionality size
|
||||
dim = OUTPUT_DIM[architecture]
|
||||
|
||||
# loading network from torchvision
|
||||
if pretrained:
|
||||
if architecture not in FEATURES:
|
||||
# initialize with network pretrained on imagenet in pytorch
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=True)
|
||||
else:
|
||||
# initialize with random weights, later on we will fill features with custom pretrained network
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=False)
|
||||
else:
|
||||
# initialize with random weights
|
||||
net_in = getattr(torchvision.models, architecture)(pretrained=False)
|
||||
|
||||
# initialize features
|
||||
# take only convolutions for features,
|
||||
# always ends with ReLU to make last activations non-negative
|
||||
if architecture.startswith('alexnet'):
|
||||
features = list(net_in.features.children())[:-1]
|
||||
elif architecture.startswith('vgg'):
|
||||
features = list(net_in.features.children())[:-1]
|
||||
elif architecture.startswith('resnet'):
|
||||
features = list(net_in.children())[:-2]
|
||||
elif architecture.startswith('densenet'):
|
||||
features = list(net_in.features.children())
|
||||
features.append(nn.ReLU(inplace=True))
|
||||
elif architecture.startswith('squeezenet'):
|
||||
features = list(net_in.features.children())
|
||||
else:
|
||||
raise ValueError('Unsupported or unknown architecture: {}!'.format(architecture))
|
||||
|
||||
# initialize local whitening
|
||||
if local_whitening:
|
||||
lwhiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: lwhiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
lw = architecture
|
||||
if lw in L_WHITENING:
|
||||
print(">> {}: for '{}' custom computed local whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), lw, os.path.basename(L_WHITENING[lw])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
lwhiten.load_state_dict(model_zoo.load_url(L_WHITENING[lw], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no local whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), lw))
|
||||
|
||||
else:
|
||||
lwhiten = None
|
||||
|
||||
# initialize pooling
|
||||
if pooling == 'gemmp':
|
||||
pool = POOLING[pooling](mp=dim)
|
||||
else:
|
||||
pool = POOLING[pooling]()
|
||||
|
||||
# initialize regional pooling
|
||||
if regional:
|
||||
rpool = pool
|
||||
rwhiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: rwhiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
rw = '{}-{}-r'.format(architecture, pooling)
|
||||
if rw in R_WHITENING:
|
||||
print(">> {}: for '{}' custom computed regional whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), rw, os.path.basename(R_WHITENING[rw])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
rwhiten.load_state_dict(model_zoo.load_url(R_WHITENING[rw], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no regional whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), rw))
|
||||
|
||||
pool = Rpool(rpool, rwhiten)
|
||||
|
||||
# initialize whitening
|
||||
if whitening:
|
||||
whiten = nn.Linear(dim, dim, bias=True)
|
||||
# TODO: whiten with possible dimensionality reduce
|
||||
|
||||
if pretrained:
|
||||
w = architecture
|
||||
if local_whitening:
|
||||
w += '-lw'
|
||||
w += '-' + pooling
|
||||
if regional:
|
||||
w += '-r'
|
||||
if w in WHITENING:
|
||||
print(">> {}: for '{}' custom computed whitening '{}' is used"
|
||||
.format(os.path.basename(__file__), w, os.path.basename(WHITENING[w])))
|
||||
whiten_dir = os.path.join(get_data_root(), 'whiten')
|
||||
whiten.load_state_dict(model_zoo.load_url(WHITENING[w], model_dir=whiten_dir))
|
||||
else:
|
||||
print(">> {}: for '{}' there is no whitening computed, random weights are used"
|
||||
.format(os.path.basename(__file__), w))
|
||||
else:
|
||||
whiten = None
|
||||
|
||||
# create meta information to be stored in the network
|
||||
meta = {
|
||||
'architecture' : architecture,
|
||||
'local_whitening' : local_whitening,
|
||||
'pooling' : pooling,
|
||||
'regional' : regional,
|
||||
'whitening' : whitening,
|
||||
'mean' : mean,
|
||||
'std' : std,
|
||||
'outputdim' : dim,
|
||||
}
|
||||
|
||||
# create a generic image retrieval network
|
||||
net = ImageRetrievalNet(features, lwhiten, pool, whiten, meta)
|
||||
|
||||
# initialize features with custom pretrained network if needed
|
||||
if pretrained and architecture in FEATURES:
|
||||
print(">> {}: for '{}' custom pretrained features '{}' are used"
|
||||
.format(os.path.basename(__file__), architecture, os.path.basename(FEATURES[architecture])))
|
||||
model_dir = os.path.join(get_data_root(), 'networks')
|
||||
net.features.load_state_dict(model_zoo.load_url(FEATURES[architecture], model_dir=model_dir))
|
||||
|
||||
return net
|
||||
|
||||
|
||||
def extract_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = torch.zeros(net.meta['outputdim'], len(images))
|
||||
for i, input in enumerate(loader):
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1 and ms[0] == 1:
|
||||
vecs[:, i] = extract_ss(net, input)
|
||||
else:
|
||||
vecs[:, i] = extract_ms(net, input, ms, msp)
|
||||
|
||||
if (i+1) % print_freq == 0 or (i+1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
|
||||
print('')
|
||||
|
||||
return vecs
|
||||
|
||||
def extract_ss(net, input):
|
||||
return net(input).cpu().data.squeeze()
|
||||
|
||||
def extract_ms(net, input, ms, msp):
|
||||
|
||||
v = torch.zeros(net.meta['outputdim'])
|
||||
|
||||
for s in ms:
|
||||
if s == 1:
|
||||
input_t = input.clone()
|
||||
else:
|
||||
input_t = nn.functional.interpolate(input, scale_factor=s, mode='bilinear', align_corners=False)
|
||||
v += net(input_t).pow(msp).cpu().data.squeeze()
|
||||
|
||||
v /= len(ms)
|
||||
v = v.pow(1./msp)
|
||||
v /= v.norm()
|
||||
|
||||
return v
|
||||
|
||||
|
||||
def extract_regional_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = []
|
||||
for i, input in enumerate(loader):
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1:
|
||||
vecs.append(extract_ssr(net, input))
|
||||
else:
|
||||
# TODO: not implemented yet
|
||||
# vecs.append(extract_msr(net, input, ms, msp))
|
||||
raise NotImplementedError
|
||||
|
||||
if (i+1) % print_freq == 0 or (i+1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
|
||||
print('')
|
||||
|
||||
return vecs
|
||||
|
||||
def extract_ssr(net, input):
|
||||
return net.pool(net.features(input), aggregate=False).squeeze(0).squeeze(-1).squeeze(-1).permute(1,0).cpu().data
|
||||
|
||||
|
||||
def extract_local_vectors(net, images, image_size, transform, bbxs=None, ms=[1], msp=1, print_freq=10):
|
||||
# moving network to gpu and eval mode
|
||||
net.cuda()
|
||||
net.eval()
|
||||
|
||||
# creating dataset loader
|
||||
loader = torch.utils.data.DataLoader(
|
||||
ImagesFromList(root='', images=images, imsize=image_size, bbxs=bbxs, transform=transform),
|
||||
batch_size=1, shuffle=False, num_workers=8, pin_memory=True
|
||||
)
|
||||
|
||||
# extracting vectors
|
||||
with torch.no_grad():
|
||||
vecs = []
|
||||
for i, input in enumerate(loader):
|
||||
input = input.cuda()
|
||||
|
||||
if len(ms) == 1:
|
||||
vecs.append(extract_ssl(net, input))
|
||||
else:
|
||||
# TODO: not implemented yet
|
||||
# vecs.append(extract_msl(net, input, ms, msp))
|
||||
raise NotImplementedError
|
||||
|
||||
if (i+1) % print_freq == 0 or (i+1) == len(images):
|
||||
print('\r>>>> {}/{} done...'.format((i+1), len(images)), end='')
|
||||
print('')
|
||||
|
||||
return vecs
|
||||
|
||||
def extract_ssl(net, input):
|
||||
return net.norm(net.features(input)).squeeze(0).view(net.meta['outputdim'], -1).cpu().data
|
0
cirtorch/utils/__init__.py
Executable file
0
cirtorch/utils/__init__.py
Executable file
154
cirtorch/utils/download.py
Executable file
154
cirtorch/utils/download.py
Executable file
@ -0,0 +1,154 @@
|
||||
import os
|
||||
|
||||
def download_test(data_dir):
|
||||
"""
|
||||
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
|
||||
|
||||
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
|
||||
If not it downloads it in the folder structure:
|
||||
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
|
||||
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
|
||||
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
|
||||
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
|
||||
"""
|
||||
|
||||
# Create data folder if it does not exist
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
|
||||
# Create datasets folder if it does not exist
|
||||
datasets_dir = os.path.join(data_dir, 'test')
|
||||
print('***************', os.path.exists(datasets_dir))
|
||||
#print(not os.path.isdir(datasets_dir))
|
||||
if not os.path.exists(datasets_dir):
|
||||
os.mkdir(datasets_dir)
|
||||
|
||||
# Download datasets folders test/DATASETNAME/
|
||||
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
for di in range(len(datasets)):
|
||||
dataset = datasets[di]
|
||||
|
||||
if dataset == 'oxford5k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
|
||||
dl_files = ['oxbuild_images.tgz']
|
||||
elif dataset == 'paris6k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
|
||||
dl_files = ['paris_1.tgz', 'paris_2.tgz']
|
||||
elif dataset == 'roxford5k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
|
||||
dl_files = ['oxbuild_images.tgz']
|
||||
elif dataset == 'rparis6k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
|
||||
dl_files = ['paris_1.tgz', 'paris_2.tgz']
|
||||
else:
|
||||
raise ValueError('Unknown dataset: {}!'.format(dataset))
|
||||
|
||||
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
|
||||
print('%%%%%%%%%%%%%%%%',dst_dir, dataset)
|
||||
if not os.path.exists(dst_dir):
|
||||
# for oxford and paris download images
|
||||
if dataset == 'oxford5k' or dataset == 'paris6k':
|
||||
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
|
||||
os.makedirs(dst_dir)
|
||||
for dli in range(len(dl_files)):
|
||||
dl_file = dl_files[dli]
|
||||
src_file = os.path.join(src_dir, dl_file)
|
||||
dst_file = os.path.join(dst_dir, dl_file)
|
||||
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
|
||||
os.system('wget {} -O {}'.format(src_file, dst_file))
|
||||
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
|
||||
# create tmp folder
|
||||
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
|
||||
os.system('mkdir {}'.format(dst_dir_tmp))
|
||||
# extract in tmp folder
|
||||
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
|
||||
# remove all (possible) subfolders by moving only files in dst_dir
|
||||
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
|
||||
# remove tmp folder
|
||||
os.system('rm -rf {}'.format(dst_dir_tmp))
|
||||
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
|
||||
os.system('rm {}'.format(dst_file))
|
||||
|
||||
# for roxford and rparis just make sym links
|
||||
elif dataset == 'roxford5k' or dataset == 'rparis6k':
|
||||
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
|
||||
dataset_old = dataset[1:]
|
||||
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
|
||||
os.mkdir(os.path.join(datasets_dir, dataset))
|
||||
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
|
||||
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
|
||||
|
||||
|
||||
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
|
||||
gnd_dst_dir = os.path.join(datasets_dir, dataset)
|
||||
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
|
||||
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
|
||||
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
|
||||
if not os.path.exists(gnd_dst_file):
|
||||
print('>> Downloading dataset {} ground truth file...'.format(dataset))
|
||||
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
|
||||
|
||||
|
||||
def download_train(data_dir):
|
||||
"""
|
||||
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
|
||||
|
||||
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
|
||||
If not it downloads it in the folder structure:
|
||||
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
|
||||
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
|
||||
"""
|
||||
|
||||
# Create data folder if it does not exist
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
|
||||
# Create datasets folder if it does not exist
|
||||
datasets_dir = os.path.join(data_dir, 'train')
|
||||
if not os.path.isdir(datasets_dir):
|
||||
os.mkdir(datasets_dir)
|
||||
|
||||
# Download folder train/retrieval-SfM-120k/
|
||||
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
|
||||
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
|
||||
dl_file = 'ims.tar.gz'
|
||||
if not os.path.isdir(dst_dir):
|
||||
src_file = os.path.join(src_dir, dl_file)
|
||||
dst_file = os.path.join(dst_dir, dl_file)
|
||||
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
|
||||
os.makedirs(dst_dir)
|
||||
print('>> Downloading ims.tar.gz...')
|
||||
os.system('wget {} -O {}'.format(src_file, dst_file))
|
||||
print('>> Extracting {}...'.format(dst_file))
|
||||
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
|
||||
print('>> Extracted, deleting {}...'.format(dst_file))
|
||||
os.system('rm {}'.format(dst_file))
|
||||
|
||||
# Create symlink for train/retrieval-SfM-30k/
|
||||
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
|
||||
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
|
||||
if not os.path.exists(dst_dir):
|
||||
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k'))
|
||||
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
|
||||
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
|
||||
|
||||
# Download db files
|
||||
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
|
||||
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
|
||||
for dataset in datasets:
|
||||
dst_dir = os.path.join(datasets_dir, dataset)
|
||||
if dataset == 'retrieval-SfM-120k':
|
||||
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
|
||||
elif dataset == 'retrieval-SfM-30k':
|
||||
dl_files = ['{}-whiten.pkl'.format(dataset)]
|
||||
|
||||
if not os.path.isdir(dst_dir):
|
||||
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
|
||||
os.mkdir(dst_dir)
|
||||
|
||||
for i in range(len(dl_files)):
|
||||
src_file = os.path.join(src_dir, dl_files[i])
|
||||
dst_file = os.path.join(dst_dir, dl_files[i])
|
||||
if not os.path.isfile(dst_file):
|
||||
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
|
||||
os.system('wget {} -O {}'.format(src_file, dst_file))
|
152
cirtorch/utils/download_win.py
Executable file
152
cirtorch/utils/download_win.py
Executable file
@ -0,0 +1,152 @@
|
||||
import os
|
||||
|
||||
def download_test(data_dir):
|
||||
"""
|
||||
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
|
||||
|
||||
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
|
||||
If not it downloads it in the folder structure:
|
||||
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
|
||||
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
|
||||
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
|
||||
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
|
||||
"""
|
||||
|
||||
# Create data folder if it does not exist
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
|
||||
# Create datasets folder if it does not exist
|
||||
datasets_dir = os.path.join(data_dir, 'test')
|
||||
if not os.path.isdir(datasets_dir):
|
||||
os.mkdir(datasets_dir)
|
||||
|
||||
# Download datasets folders test/DATASETNAME/
|
||||
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
|
||||
for di in range(len(datasets)):
|
||||
dataset = datasets[di]
|
||||
|
||||
if dataset == 'oxford5k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
|
||||
dl_files = ['oxbuild_images.tgz']
|
||||
elif dataset == 'paris6k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
|
||||
dl_files = ['paris_1.tgz', 'paris_2.tgz']
|
||||
elif dataset == 'roxford5k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
|
||||
dl_files = ['oxbuild_images.tgz']
|
||||
elif dataset == 'rparis6k':
|
||||
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
|
||||
dl_files = ['paris_1.tgz', 'paris_2.tgz']
|
||||
else:
|
||||
raise ValueError('Unknown dataset: {}!'.format(dataset))
|
||||
|
||||
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
|
||||
if not os.path.isdir(dst_dir):
|
||||
|
||||
# for oxford and paris download images
|
||||
if dataset == 'oxford5k' or dataset == 'paris6k':
|
||||
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
|
||||
os.makedirs(dst_dir)
|
||||
for dli in range(len(dl_files)):
|
||||
dl_file = dl_files[dli]
|
||||
src_file = os.path.join(src_dir, dl_file)
|
||||
dst_file = os.path.join(dst_dir, dl_file)
|
||||
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
|
||||
os.system('wget {} -O {}'.format(src_file, dst_file))
|
||||
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
|
||||
# create tmp folder
|
||||
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
|
||||
os.system('mkdir {}'.format(dst_dir_tmp))
|
||||
# extract in tmp folder
|
||||
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
|
||||
# remove all (possible) subfolders by moving only files in dst_dir
|
||||
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
|
||||
# remove tmp folder
|
||||
os.system('rd {}'.format(dst_dir_tmp))
|
||||
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
|
||||
os.system('del {}'.format(dst_file))
|
||||
|
||||
# for roxford and rparis just make sym links
|
||||
elif dataset == 'roxford5k' or dataset == 'rparis6k':
|
||||
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
|
||||
dataset_old = dataset[1:]
|
||||
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
|
||||
os.mkdir(os.path.join(datasets_dir, dataset))
|
||||
os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir))
|
||||
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
|
||||
|
||||
|
||||
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
|
||||
gnd_dst_dir = os.path.join(datasets_dir, dataset)
|
||||
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
|
||||
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
|
||||
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
|
||||
if not os.path.exists(gnd_dst_file):
|
||||
print('>> Downloading dataset {} ground truth file...'.format(dataset))
|
||||
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
|
||||
|
||||
|
||||
def download_train(data_dir):
|
||||
"""
|
||||
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
|
||||
|
||||
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
|
||||
If not it downloads it in the folder structure:
|
||||
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
|
||||
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
|
||||
"""
|
||||
|
||||
# Create data folder if it does not exist
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
print(data_dir)
|
||||
# Create datasets folder if it does not exist
|
||||
datasets_dir = os.path.join(data_dir, 'train')
|
||||
if not os.path.isdir(datasets_dir):
|
||||
os.mkdir(datasets_dir)
|
||||
|
||||
# Download folder train/retrieval-SfM-120k/
|
||||
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
|
||||
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
|
||||
dl_file = 'ims.tar.gz'
|
||||
if not os.path.isdir(dst_dir):
|
||||
src_file = os.path.join(src_dir, dl_file)
|
||||
dst_file = os.path.join(dst_dir, dl_file)
|
||||
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
|
||||
os.makedirs(dst_dir)
|
||||
print('>> Downloading ims.tar.gz...')
|
||||
# os.system('wget {} -O {}'.format(src_file, dst_file))
|
||||
print('>> Extracting {}...'.format(dst_file))
|
||||
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
|
||||
print('>> Extracted, deleting {}...'.format(dst_file))
|
||||
os.system('del {}'.format(dst_file))
|
||||
|
||||
# Create symlink for train/retrieval-SfM-30k/
|
||||
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
|
||||
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
|
||||
if not os.path.isdir(dst_dir):
|
||||
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims'))
|
||||
os.system('mklink {} {}'.format(dst_dir_old, dst_dir))
|
||||
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
|
||||
|
||||
# Download db files
|
||||
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
|
||||
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
|
||||
for dataset in datasets:
|
||||
dst_dir = os.path.join(datasets_dir, dataset)
|
||||
if dataset == 'retrieval-SfM-120k':
|
||||
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
|
||||
elif dataset == 'retrieval-SfM-30k':
|
||||
dl_files = ['{}-whiten.pkl'.format(dataset)]
|
||||
|
||||
if not os.path.isdir(dst_dir):
|
||||
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
|
||||
os.mkdir(dst_dir)
|
||||
|
||||
for i in range(len(dl_files)):
|
||||
src_file = os.path.join(src_dir, dl_files[i])
|
||||
dst_file = os.path.join(dst_dir, dl_files[i])
|
||||
if not os.path.isfile(dst_file):
|
||||
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
|
||||
os.system('wget {} -O {}'.format(src_file, dst_file))
|
149
cirtorch/utils/evaluate.py
Executable file
149
cirtorch/utils/evaluate.py
Executable file
@ -0,0 +1,149 @@
|
||||
import numpy as np
|
||||
|
||||
def compute_ap(ranks, nres):
|
||||
"""
|
||||
Computes average precision for given ranked indexes.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
ranks : zerro-based ranks of positive images
|
||||
nres : number of positive images
|
||||
|
||||
Returns
|
||||
-------
|
||||
ap : average precision
|
||||
"""
|
||||
|
||||
# number of images ranked by the system
|
||||
nimgranks = len(ranks)
|
||||
|
||||
# accumulate trapezoids in PR-plot
|
||||
ap = 0
|
||||
|
||||
recall_step = 1. / nres
|
||||
|
||||
for j in np.arange(nimgranks):
|
||||
rank = ranks[j]
|
||||
|
||||
if rank == 0:
|
||||
precision_0 = 1.
|
||||
else:
|
||||
precision_0 = float(j) / rank
|
||||
|
||||
precision_1 = float(j + 1) / (rank + 1)
|
||||
|
||||
ap += (precision_0 + precision_1) * recall_step / 2.
|
||||
|
||||
return ap
|
||||
|
||||
def compute_map(ranks, gnd, kappas=[]):
|
||||
"""
|
||||
Computes the mAP for a given set of returned results.
|
||||
|
||||
Usage:
|
||||
map = compute_map (ranks, gnd)
|
||||
computes mean average precsion (map) only
|
||||
|
||||
map, aps, pr, prs = compute_map (ranks, gnd, kappas)
|
||||
computes mean average precision (map), average precision (aps) for each query
|
||||
computes mean precision at kappas (pr), precision at kappas (prs) for each query
|
||||
|
||||
Notes:
|
||||
1) ranks starts from 0, ranks.shape = db_size X #queries
|
||||
2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
|
||||
3) If there are no positive images for some query, that query is excluded from the evaluation
|
||||
"""
|
||||
|
||||
map = 0.
|
||||
nq = len(gnd) # number of queries
|
||||
aps = np.zeros(nq)
|
||||
pr = np.zeros(len(kappas))
|
||||
prs = np.zeros((nq, len(kappas)))
|
||||
nempty = 0
|
||||
|
||||
for i in np.arange(nq):
|
||||
qgnd = np.array(gnd[i]['ok'])
|
||||
|
||||
# no positive images, skip from the average
|
||||
if qgnd.shape[0] == 0:
|
||||
aps[i] = float('nan')
|
||||
prs[i, :] = float('nan')
|
||||
nempty += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
qgndj = np.array(gnd[i]['junk'])
|
||||
except:
|
||||
qgndj = np.empty(0)
|
||||
|
||||
# sorted positions of positive and junk images (0 based)
|
||||
pos = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgnd)]
|
||||
junk = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgndj)]
|
||||
|
||||
k = 0;
|
||||
ij = 0;
|
||||
if len(junk):
|
||||
# decrease positions of positives based on the number of
|
||||
# junk images appearing before them
|
||||
ip = 0
|
||||
while (ip < len(pos)):
|
||||
while (ij < len(junk) and pos[ip] > junk[ij]):
|
||||
k += 1
|
||||
ij += 1
|
||||
pos[ip] = pos[ip] - k
|
||||
ip += 1
|
||||
|
||||
# compute ap
|
||||
ap = compute_ap(pos, len(qgnd))
|
||||
map = map + ap
|
||||
aps[i] = ap
|
||||
|
||||
# compute precision @ k
|
||||
pos += 1 # get it to 1-based
|
||||
for j in np.arange(len(kappas)):
|
||||
kq = min(max(pos), kappas[j]);
|
||||
prs[i, j] = (pos <= kq).sum() / kq
|
||||
pr = pr + prs[i, :]
|
||||
|
||||
map = map / (nq - nempty)
|
||||
pr = pr / (nq - nempty)
|
||||
|
||||
return map, aps, pr, prs
|
||||
|
||||
|
||||
def compute_map_and_print(dataset, ranks, gnd, kappas=[1, 5, 10]):
|
||||
|
||||
# old evaluation protocol
|
||||
if dataset.startswith('oxford5k') or dataset.startswith('paris6k'):
|
||||
map, aps, _, _ = compute_map(ranks, gnd)
|
||||
print('>> {}: mAP {:.2f}'.format(dataset, np.around(map*100, decimals=2)))
|
||||
|
||||
# new evaluation protocol
|
||||
elif dataset.startswith('roxford5k') or dataset.startswith('rparis6k'):
|
||||
|
||||
gnd_t = []
|
||||
for i in range(len(gnd)):
|
||||
g = {}
|
||||
g['ok'] = np.concatenate([gnd[i]['easy']])
|
||||
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['hard']])
|
||||
gnd_t.append(g)
|
||||
mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, kappas)
|
||||
|
||||
gnd_t = []
|
||||
for i in range(len(gnd)):
|
||||
g = {}
|
||||
g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']])
|
||||
g['junk'] = np.concatenate([gnd[i]['junk']])
|
||||
gnd_t.append(g)
|
||||
mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, kappas)
|
||||
|
||||
gnd_t = []
|
||||
for i in range(len(gnd)):
|
||||
g = {}
|
||||
g['ok'] = np.concatenate([gnd[i]['hard']])
|
||||
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']])
|
||||
gnd_t.append(g)
|
||||
mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas)
|
||||
|
||||
print('>> {}: mAP E: {}, M: {}, H: {}'.format(dataset, np.around(mapE*100, decimals=2), np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2)))
|
||||
print('>> {}: mP@k{} E: {}, M: {}, H: {}'.format(dataset, kappas, np.around(mprE*100, decimals=2), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2)))
|
34
cirtorch/utils/general.py
Executable file
34
cirtorch/utils/general.py
Executable file
@ -0,0 +1,34 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
def get_root():
|
||||
return os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
|
||||
|
||||
|
||||
def get_data_root():
|
||||
return os.path.join(get_root(), 'data')
|
||||
|
||||
|
||||
def htime(c):
|
||||
c = round(c)
|
||||
|
||||
days = c // 86400
|
||||
hours = c // 3600 % 24
|
||||
minutes = c // 60 % 60
|
||||
seconds = c % 60
|
||||
|
||||
if days > 0:
|
||||
return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds)
|
||||
if hours > 0:
|
||||
return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds)
|
||||
if minutes > 0:
|
||||
return '{:d}m {:d}s'.format(minutes, seconds)
|
||||
return '{:d}s'.format(seconds)
|
||||
|
||||
|
||||
def sha256_hash(filename, block_size=65536, length=8):
|
||||
sha256 = hashlib.sha256()
|
||||
with open(filename, 'rb') as f:
|
||||
for block in iter(lambda: f.read(block_size), b''):
|
||||
sha256.update(block)
|
||||
return sha256.hexdigest()[:length-1]
|
65
cirtorch/utils/whiten.py
Executable file
65
cirtorch/utils/whiten.py
Executable file
@ -0,0 +1,65 @@
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
def whitenapply(X, m, P, dimensions=None):
|
||||
|
||||
if not dimensions:
|
||||
dimensions = P.shape[0]
|
||||
|
||||
X = np.dot(P[:dimensions, :], X-m)
|
||||
X = X / (np.linalg.norm(X, ord=2, axis=0, keepdims=True) + 1e-6)
|
||||
|
||||
return X
|
||||
|
||||
def pcawhitenlearn(X):
|
||||
|
||||
N = X.shape[1]
|
||||
|
||||
# Learning PCA w/o annotations
|
||||
m = X.mean(axis=1, keepdims=True)
|
||||
Xc = X - m
|
||||
Xcov = np.dot(Xc, Xc.T)
|
||||
Xcov = (Xcov + Xcov.T) / (2*N)
|
||||
eigval, eigvec = np.linalg.eig(Xcov)
|
||||
order = eigval.argsort()[::-1]
|
||||
eigval = eigval[order]
|
||||
eigvec = eigvec[:, order]
|
||||
|
||||
P = np.dot(np.linalg.inv(np.sqrt(np.diag(eigval))), eigvec.T)
|
||||
|
||||
return m, P
|
||||
|
||||
def whitenlearn(X, qidxs, pidxs):
|
||||
|
||||
# Learning Lw w annotations
|
||||
m = X[:, qidxs].mean(axis=1, keepdims=True)
|
||||
df = X[:, qidxs] - X[:, pidxs]
|
||||
S = np.dot(df, df.T) / df.shape[1]
|
||||
P = np.linalg.inv(cholesky(S))
|
||||
df = np.dot(P, X-m)
|
||||
D = np.dot(df, df.T)
|
||||
eigval, eigvec = np.linalg.eig(D)
|
||||
order = eigval.argsort()[::-1]
|
||||
eigval = eigval[order]
|
||||
eigvec = eigvec[:, order]
|
||||
|
||||
P = np.dot(eigvec.T, P)
|
||||
|
||||
return m, P
|
||||
|
||||
def cholesky(S):
|
||||
# Cholesky decomposition
|
||||
# with adding a small value on the diagonal
|
||||
# until matrix is positive definite
|
||||
alpha = 0
|
||||
while 1:
|
||||
try:
|
||||
L = np.linalg.cholesky(S + alpha*np.eye(*S.shape))
|
||||
return L
|
||||
except:
|
||||
if alpha == 0:
|
||||
alpha = 1e-10
|
||||
else:
|
||||
alpha *= 10
|
||||
print(">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} on the diagonal"
|
||||
.format(os.path.basename(__file__), alpha))
|
Reference in New Issue
Block a user