Files
ieemoo-ai-searchv2/cirtorch/utils/download_win.py
2022-11-22 15:32:06 +08:00

153 lines
7.5 KiB
Python
Executable File

import os
def download_test(data_dir):
"""
DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file
DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file
DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'test')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download datasets folders test/DATASETNAME/
datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
for di in range(len(datasets)):
dataset = datasets[di]
if dataset == 'oxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'paris6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
elif dataset == 'roxford5k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
dl_files = ['oxbuild_images.tgz']
elif dataset == 'rparis6k':
src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
dl_files = ['paris_1.tgz', 'paris_2.tgz']
else:
raise ValueError('Unknown dataset: {}!'.format(dataset))
dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
if not os.path.isdir(dst_dir):
# for oxford and paris download images
if dataset == 'oxford5k' or dataset == 'paris6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
os.makedirs(dst_dir)
for dli in range(len(dl_files)):
dl_file = dl_files[dli]
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
# create tmp folder
dst_dir_tmp = os.path.join(dst_dir, 'tmp')
os.system('mkdir {}'.format(dst_dir_tmp))
# extract in tmp folder
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
# remove all (possible) subfolders by moving only files in dst_dir
os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
# remove tmp folder
os.system('rd {}'.format(dst_dir_tmp))
print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
os.system('del {}'.format(dst_file))
# for roxford and rparis just make sym links
elif dataset == 'roxford5k' or dataset == 'rparis6k':
print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
dataset_old = dataset[1:]
dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
os.mkdir(os.path.join(datasets_dir, dataset))
os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
gnd_dst_dir = os.path.join(datasets_dir, dataset)
gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
if not os.path.exists(gnd_dst_file):
print('>> Downloading dataset {} ground truth file...'.format(dataset))
os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
def download_train(data_dir):
"""
DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
If not it downloads it in the folder structure:
DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files
DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files
"""
# Create data folder if it does not exist
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
print(data_dir)
# Create datasets folder if it does not exist
datasets_dir = os.path.join(data_dir, 'train')
if not os.path.isdir(datasets_dir):
os.mkdir(datasets_dir)
# Download folder train/retrieval-SfM-120k/
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dl_file = 'ims.tar.gz'
if not os.path.isdir(dst_dir):
src_file = os.path.join(src_dir, dl_file)
dst_file = os.path.join(dst_dir, dl_file)
print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
os.makedirs(dst_dir)
print('>> Downloading ims.tar.gz...')
# os.system('wget {} -O {}'.format(src_file, dst_file))
print('>> Extracting {}...'.format(dst_file))
os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
print('>> Extracted, deleting {}...'.format(dst_file))
os.system('del {}'.format(dst_file))
# Create symlink for train/retrieval-SfM-30k/
dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
if not os.path.isdir(dst_dir):
os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims'))
os.system('mklink {} {}'.format(dst_dir_old, dst_dir))
print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
# Download db files
src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
for dataset in datasets:
dst_dir = os.path.join(datasets_dir, dataset)
if dataset == 'retrieval-SfM-120k':
dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
elif dataset == 'retrieval-SfM-30k':
dl_files = ['{}-whiten.pkl'.format(dataset)]
if not os.path.isdir(dst_dir):
print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
os.mkdir(dst_dir)
for i in range(len(dl_files)):
src_file = os.path.join(src_dir, dl_files[i])
dst_file = os.path.join(dst_dir, dl_files[i])
if not os.path.isfile(dst_file):
print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
os.system('wget {} -O {}'.format(src_file, dst_file))