import os def download_test(data_dir): """ DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing. download_test(DATA_ROOT) checks if the data necessary for running the example script exist. If not it downloads it in the folder structure: DATA_ROOT/test/oxford5k/ : folder with Oxford images and ground truth file DATA_ROOT/test/paris6k/ : folder with Paris images and ground truth file DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file DATA_ROOT/test/rparis6k/ : folder with Paris images and revisited ground truth file """ # Create data folder if it does not exist if not os.path.isdir(data_dir): os.mkdir(data_dir) # Create datasets folder if it does not exist datasets_dir = os.path.join(data_dir, 'test') if not os.path.isdir(datasets_dir): os.mkdir(datasets_dir) # Download datasets folders test/DATASETNAME/ datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k'] for di in range(len(datasets)): dataset = datasets[di] if dataset == 'oxford5k': src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings' dl_files = ['oxbuild_images.tgz'] elif dataset == 'paris6k': src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings' dl_files = ['paris_1.tgz', 'paris_2.tgz'] elif dataset == 'roxford5k': src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings' dl_files = ['oxbuild_images.tgz'] elif dataset == 'rparis6k': src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings' dl_files = ['paris_1.tgz', 'paris_2.tgz'] else: raise ValueError('Unknown dataset: {}!'.format(dataset)) dst_dir = os.path.join(datasets_dir, dataset, 'jpg') if not os.path.isdir(dst_dir): # for oxford and paris download images if dataset == 'oxford5k' or dataset == 'paris6k': print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir)) os.makedirs(dst_dir) for dli in range(len(dl_files)): dl_file = dl_files[dli] src_file = os.path.join(src_dir, dl_file) dst_file = os.path.join(dst_dir, dl_file) print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file)) os.system('wget {} -O {}'.format(src_file, dst_file)) print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file)) # create tmp folder dst_dir_tmp = os.path.join(dst_dir, 'tmp') os.system('mkdir {}'.format(dst_dir_tmp)) # extract in tmp folder os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp)) # remove all (possible) subfolders by moving only files in dst_dir os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir)) # remove tmp folder os.system('rd {}'.format(dst_dir_tmp)) print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file)) os.system('del {}'.format(dst_file)) # for roxford and rparis just make sym links elif dataset == 'roxford5k' or dataset == 'rparis6k': print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir)) dataset_old = dataset[1:] dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg') os.mkdir(os.path.join(datasets_dir, dataset)) os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir)) print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset)) gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset) gnd_dst_dir = os.path.join(datasets_dir, dataset) gnd_dl_file = 'gnd_{}.pkl'.format(dataset) gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file) gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file) if not os.path.exists(gnd_dst_file): print('>> Downloading dataset {} ground truth file...'.format(dataset)) os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file)) def download_train(data_dir): """ DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training. download_train(DATA_ROOT) checks if the data necessary for running the example script exist. If not it downloads it in the folder structure: DATA_ROOT/train/retrieval-SfM-120k/ : folder with rsfm120k images and db files DATA_ROOT/train/retrieval-SfM-30k/ : folder with rsfm30k images and db files """ # Create data folder if it does not exist if not os.path.isdir(data_dir): os.mkdir(data_dir) print(data_dir) # Create datasets folder if it does not exist datasets_dir = os.path.join(data_dir, 'train') if not os.path.isdir(datasets_dir): os.mkdir(datasets_dir) # Download folder train/retrieval-SfM-120k/ src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims') dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims') dl_file = 'ims.tar.gz' if not os.path.isdir(dst_dir): src_file = os.path.join(src_dir, dl_file) dst_file = os.path.join(dst_dir, dl_file) print('>> Image directory does not exist. Creating: {}'.format(dst_dir)) os.makedirs(dst_dir) print('>> Downloading ims.tar.gz...') # os.system('wget {} -O {}'.format(src_file, dst_file)) print('>> Extracting {}...'.format(dst_file)) os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir)) print('>> Extracted, deleting {}...'.format(dst_file)) os.system('del {}'.format(dst_file)) # Create symlink for train/retrieval-SfM-30k/ dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims') dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims') if not os.path.isdir(dst_dir): os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims')) os.system('mklink {} {}'.format(dst_dir_old, dst_dir)) print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims') # Download db files src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs') datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k'] for dataset in datasets: dst_dir = os.path.join(datasets_dir, dataset) if dataset == 'retrieval-SfM-120k': dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)] elif dataset == 'retrieval-SfM-30k': dl_files = ['{}-whiten.pkl'.format(dataset)] if not os.path.isdir(dst_dir): print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir)) os.mkdir(dst_dir) for i in range(len(dl_files)): src_file = os.path.join(src_dir, dl_files[i]) dst_file = os.path.join(dst_dir, dl_files[i]) if not os.path.isfile(dst_file): print('>> DB file {} does not exist. Downloading...'.format(dl_files[i])) os.system('wget {} -O {}'.format(src_file, dst_file))