first push

This commit is contained in:
2022-11-22 15:32:06 +08:00
commit f4eee81c45
81 changed files with 31537 additions and 0 deletions

0
cirtorch/utils/__init__.py Executable file
View File

154
cirtorch/utils/download.py Executable file
View File

@ -0,0 +1,154 @@
import os
def download_test(data_dir):
"""
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'test')
print('***************', os.path.exists(datasets_dir))
#print(not os.path.isdir(datasets_dir))
if not os.path.exists(datasets_dir):
os.mkdir(datasets_dir)
# Download datasets folders test/DATASETNAME/
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
for di in range(len(datasets)):
dataset = datasets[di]
if dataset == 'oxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'paris6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
elif dataset == 'roxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'rparis6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
else:
raise ValueError('Unknown dataset: {}!'.format(dataset))
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
print('%%%%%%%%%%%%%%%%',dst_dir, dataset)
if not os.path.exists(dst_dir):
# for oxford and paris download images
if dataset == 'oxford5k' or dataset == 'paris6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
os.makedirs(dst_dir)
for dli in range(len(dl_files)):
dl_file = dl_files[dli]
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
# create tmp folder
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
os.system('mkdir {}'.format(dst_dir_tmp))
# extract in tmp folder
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
# remove all (possible) subfolders by moving only files in dst_dir
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
# remove tmp folder
os.system('rm -rf {}'.format(dst_dir_tmp))
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
os.system('rm {}'.format(dst_file))
# for roxford and rparis just make sym links
elif dataset == 'roxford5k' or dataset == 'rparis6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
dataset_old = dataset[1:]
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
os.mkdir(os.path.join(datasets_dir, dataset))
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
gnd_dst_dir = os.path.join(datasets_dir, dataset)
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
if not os.path.exists(gnd_dst_file):
print('>> Downloading dataset {} ground truth file...'.format(dataset))
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
def download_train(data_dir):
"""
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'train')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download folder train/retrieval-SfM-120k/
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dl_file = 'ims.tar.gz'
if not os.path.isdir(dst_dir):
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
os.makedirs(dst_dir)
print('>> Downloading ims.tar.gz...')
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting {}...'.format(dst_file))
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
print('>> Extracted, deleting {}...'.format(dst_file))
os.system('rm {}'.format(dst_file))
# Create symlink for train/retrieval-SfM-30k/
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
if not os.path.exists(dst_dir):
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k'))
os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
# Download db files
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
for dataset in datasets:
dst_dir = os.path.join(datasets_dir, dataset)
if dataset == 'retrieval-SfM-120k':
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
elif dataset == 'retrieval-SfM-30k':
dl_files = ['{}-whiten.pkl'.format(dataset)]
if not os.path.isdir(dst_dir):
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
os.mkdir(dst_dir)
for i in range(len(dl_files)):
src_file = os.path.join(src_dir, dl_files[i])
dst_file = os.path.join(dst_dir, dl_files[i])
if not os.path.isfile(dst_file):
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
os.system('wget {} -O {}'.format(src_file, dst_file))

152
cirtorch/utils/download_win.py Executable file
View File

@ -0,0 +1,152 @@
import os
def download_test(data_dir):
"""
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'test')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download datasets folders test/DATASETNAME/
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
for di in range(len(datasets)):
dataset = datasets[di]
if dataset == 'oxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'paris6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
elif dataset == 'roxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'rparis6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
else:
raise ValueError('Unknown dataset: {}!'.format(dataset))
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
if not os.path.isdir(dst_dir):
# for oxford and paris download images
if dataset == 'oxford5k' or dataset == 'paris6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
os.makedirs(dst_dir)
for dli in range(len(dl_files)):
dl_file = dl_files[dli]
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
# create tmp folder
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
os.system('mkdir {}'.format(dst_dir_tmp))
# extract in tmp folder
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
# remove all (possible) subfolders by moving only files in dst_dir
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
# remove tmp folder
os.system('rd {}'.format(dst_dir_tmp))
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
os.system('del {}'.format(dst_file))
# for roxford and rparis just make sym links
elif dataset == 'roxford5k' or dataset == 'rparis6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
dataset_old = dataset[1:]
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
os.mkdir(os.path.join(datasets_dir, dataset))
os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
gnd_dst_dir = os.path.join(datasets_dir, dataset)
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
if not os.path.exists(gnd_dst_file):
print('>> Downloading dataset {} ground truth file...'.format(dataset))
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
def download_train(data_dir):
"""
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
print(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'train')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download folder train/retrieval-SfM-120k/
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dl_file = 'ims.tar.gz'
if not os.path.isdir(dst_dir):
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
os.makedirs(dst_dir)
print('>> Downloading ims.tar.gz...')
# os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting {}...'.format(dst_file))
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
print('>> Extracted, deleting {}...'.format(dst_file))
os.system('del {}'.format(dst_file))
# Create symlink for train/retrieval-SfM-30k/
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
if not os.path.isdir(dst_dir):
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims'))
os.system('mklink {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
# Download db files
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
for dataset in datasets:
dst_dir = os.path.join(datasets_dir, dataset)
if dataset == 'retrieval-SfM-120k':
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
elif dataset == 'retrieval-SfM-30k':
dl_files = ['{}-whiten.pkl'.format(dataset)]
if not os.path.isdir(dst_dir):
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
os.mkdir(dst_dir)
for i in range(len(dl_files)):
src_file = os.path.join(src_dir, dl_files[i])
dst_file = os.path.join(dst_dir, dl_files[i])
if not os.path.isfile(dst_file):
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
os.system('wget {} -O {}'.format(src_file, dst_file))

149
cirtorch/utils/evaluate.py Executable file
View File

@ -0,0 +1,149 @@
import numpy as np
def compute_ap(ranks, nres):
"""
Computes average precision for given ranked indexes.
Arguments
---------
ranks : zerro-based ranks of positive images
nres : number of positive images
Returns
-------
ap : average precision
"""
# number of images ranked by the system
nimgranks = len(ranks)
# accumulate trapezoids in PR-plot
ap = 0
recall_step = 1. / nres
for j in np.arange(nimgranks):
rank = ranks[j]
if rank == 0:
precision_0 = 1.
else:
precision_0 = float(j) / rank
precision_1 = float(j + 1) / (rank + 1)
ap += (precision_0 + precision_1) * recall_step / 2.
return ap
def compute_map(ranks, gnd, kappas=[]):
"""
Computes the mAP for a given set of returned results.
Usage:
map = compute_map (ranks, gnd)
computes mean average precsion (map) only
map, aps, pr, prs = compute_map (ranks, gnd, kappas)
computes mean average precision (map), average precision (aps) for each query
computes mean precision at kappas (pr), precision at kappas (prs) for each query
Notes:
1) ranks starts from 0, ranks.shape = db_size X #queries
2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
3) If there are no positive images for some query, that query is excluded from the evaluation
"""
map = 0.
nq = len(gnd) # number of queries
aps = np.zeros(nq)
pr = np.zeros(len(kappas))
prs = np.zeros((nq, len(kappas)))
nempty = 0
for i in np.arange(nq):
qgnd = np.array(gnd[i]['ok'])
# no positive images, skip from the average
if qgnd.shape[0] == 0:
aps[i] = float('nan')
prs[i, :] = float('nan')
nempty += 1
continue
try:
qgndj = np.array(gnd[i]['junk'])
except:
qgndj = np.empty(0)
# sorted positions of positive and junk images (0 based)
pos = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgnd)]
junk = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgndj)]
k = 0;
ij = 0;
if len(junk):
# decrease positions of positives based on the number of
# junk images appearing before them
ip = 0
while (ip < len(pos)):
while (ij < len(junk) and pos[ip] > junk[ij]):
k += 1
ij += 1
pos[ip] = pos[ip] - k
ip += 1
# compute ap
ap = compute_ap(pos, len(qgnd))
map = map + ap
aps[i] = ap
# compute precision @ k
pos += 1 # get it to 1-based
for j in np.arange(len(kappas)):
kq = min(max(pos), kappas[j]);
prs[i, j] = (pos <= kq).sum() / kq
pr = pr + prs[i, :]
map = map / (nq - nempty)
pr = pr / (nq - nempty)
return map, aps, pr, prs
def compute_map_and_print(dataset, ranks, gnd, kappas=[1, 5, 10]):
# old evaluation protocol
if dataset.startswith('oxford5k') or dataset.startswith('paris6k'):
map, aps, _, _ = compute_map(ranks, gnd)
print('>> {}: mAP {:.2f}'.format(dataset, np.around(map*100, decimals=2)))
# new evaluation protocol
elif dataset.startswith('roxford5k') or dataset.startswith('rparis6k'):
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['easy']])
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['hard']])
gnd_t.append(g)
mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, kappas)
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk']])
gnd_t.append(g)
mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, kappas)
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']])
gnd_t.append(g)
mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas)
print('>> {}: mAP E: {}, M: {}, H: {}'.format(dataset, np.around(mapE*100, decimals=2), np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2)))
print('>> {}: mP@k{} E: {}, M: {}, H: {}'.format(dataset, kappas, np.around(mprE*100, decimals=2), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2)))

34
cirtorch/utils/general.py Executable file
View File

@ -0,0 +1,34 @@
import os
import hashlib
def get_root():
return os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
def get_data_root():
return os.path.join(get_root(), 'data')
def htime(c):
c = round(c)
days = c // 86400
hours = c // 3600 % 24
minutes = c // 60 % 60
seconds = c % 60
if days > 0:
return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds)
if hours > 0:
return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds)
if minutes > 0:
return '{:d}m {:d}s'.format(minutes, seconds)
return '{:d}s'.format(seconds)
def sha256_hash(filename, block_size=65536, length=8):
sha256 = hashlib.sha256()
with open(filename, 'rb') as f:
for block in iter(lambda: f.read(block_size), b''):
sha256.update(block)
return sha256.hexdigest()[:length-1]

65
cirtorch/utils/whiten.py Executable file
View File

@ -0,0 +1,65 @@
import os
import numpy as np
def whitenapply(X, m, P, dimensions=None):
if not dimensions:
dimensions = P.shape[0]
X = np.dot(P[:dimensions, :], X-m)
X = X / (np.linalg.norm(X, ord=2, axis=0, keepdims=True) + 1e-6)
return X
def pcawhitenlearn(X):
N = X.shape[1]
# Learning PCA w/o annotations
m = X.mean(axis=1, keepdims=True)
Xc = X - m
Xcov = np.dot(Xc, Xc.T)
Xcov = (Xcov + Xcov.T) / (2*N)
eigval, eigvec = np.linalg.eig(Xcov)
order = eigval.argsort()[::-1]
eigval = eigval[order]
eigvec = eigvec[:, order]
P = np.dot(np.linalg.inv(np.sqrt(np.diag(eigval))), eigvec.T)
return m, P
def whitenlearn(X, qidxs, pidxs):
# Learning Lw w annotations
m = X[:, qidxs].mean(axis=1, keepdims=True)
df = X[:, qidxs] - X[:, pidxs]
S = np.dot(df, df.T) / df.shape[1]
P = np.linalg.inv(cholesky(S))
df = np.dot(P, X-m)
D = np.dot(df, df.T)
eigval, eigvec = np.linalg.eig(D)
order = eigval.argsort()[::-1]
eigval = eigval[order]
eigvec = eigvec[:, order]
P = np.dot(eigvec.T, P)
return m, P
def cholesky(S):
# Cholesky decomposition
# with adding a small value on the diagonal
# until matrix is positive definite
alpha = 0
while 1:
try:
L = np.linalg.cholesky(S + alpha*np.eye(*S.shape))
return L
except:
if alpha == 0:
alpha = 1e-10
else:
alpha *= 10
print(">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} on the diagonal"
.format(os.path.basename(__file__), alpha))