first push

2022-11-22 15:32:06 +08:00
commit f4eee81c45
81 changed files with 31537 additions and 0 deletions
--- a/cirtorch/utils/init.py
+++ b/cirtorch/utils/init.py
--- a/cirtorch/utils/download.py
+++ b/cirtorch/utils/download.py
@ -0,0 +1,154 @@
+import os
+
+def download_test(data_dir):
+    """
+    DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
+      
+        download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
+        If not it downloads it in the folder structure:
+            DATA_ROOT/test/oxford5k/  : folder with Oxford images and ground truth file
+            DATA_ROOT/test/paris6k/   : folder with Paris images and ground truth file
+            DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
+            DATA_ROOT/test/rparis6k/  : folder with Paris images and revisited ground truth file
+    """
+
+    # Create data folder if it does not exist
+    if not os.path.isdir(data_dir):
+        os.mkdir(data_dir)
+    
+    # Create datasets folder if it does not exist
+    datasets_dir = os.path.join(data_dir, 'test')
+    print('***************', os.path.exists(datasets_dir))
+    #print(not os.path.isdir(datasets_dir))
+    if not os.path.exists(datasets_dir):
+        os.mkdir(datasets_dir)
+
+    # Download datasets folders test/DATASETNAME/
+    datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
+    for di in range(len(datasets)):
+        dataset = datasets[di]
+
+        if dataset == 'oxford5k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
+            dl_files = ['oxbuild_images.tgz']
+        elif dataset == 'paris6k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
+            dl_files = ['paris_1.tgz', 'paris_2.tgz']
+        elif dataset == 'roxford5k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
+            dl_files = ['oxbuild_images.tgz']
+        elif dataset == 'rparis6k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
+            dl_files = ['paris_1.tgz', 'paris_2.tgz']
+        else:
+            raise ValueError('Unknown dataset: {}!'.format(dataset))
+
+        dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
+        print('%%%%%%%%%%%%%%%%',dst_dir, dataset)
+        if  not os.path.exists(dst_dir):
+            # for oxford and paris download images
+            if dataset == 'oxford5k' or dataset == 'paris6k':
+                print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
+                os.makedirs(dst_dir)
+                for dli in range(len(dl_files)):
+                    dl_file = dl_files[dli]
+                    src_file = os.path.join(src_dir, dl_file)
+                    dst_file = os.path.join(dst_dir, dl_file)
+                    print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
+                    os.system('wget {} -O {}'.format(src_file, dst_file))
+                    print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
+                    # create tmp folder
+                    dst_dir_tmp = os.path.join(dst_dir, 'tmp')
+                    os.system('mkdir {}'.format(dst_dir_tmp))
+                    # extract in tmp folder
+                    os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
+                    # remove all (possible) subfolders by moving only files in dst_dir
+                    os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
+                    # remove tmp folder
+                    os.system('rm -rf {}'.format(dst_dir_tmp))
+                    print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
+                    os.system('rm {}'.format(dst_file))
+
+            # for roxford and rparis just make sym links
+            elif dataset == 'roxford5k' or dataset == 'rparis6k':
+                print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
+                dataset_old = dataset[1:]
+                dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
+                os.mkdir(os.path.join(datasets_dir, dataset))
+                os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
+                print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
+
+
+        gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
+        gnd_dst_dir = os.path.join(datasets_dir, dataset)
+        gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
+        gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
+        gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
+        if not os.path.exists(gnd_dst_file):
+            print('>> Downloading dataset {} ground truth file...'.format(dataset))
+            os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
+
+
+def download_train(data_dir):
+    """
+    DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
+      
+        download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
+        If not it downloads it in the folder structure:
+            DATA_ROOT/train/retrieval-SfM-120k/  : folder with rsfm120k images and db files
+            DATA_ROOT/train/retrieval-SfM-30k/   : folder with rsfm30k images and db files
+    """
+
+    # Create data folder if it does not exist
+    if not os.path.isdir(data_dir):
+        os.mkdir(data_dir)
+    
+    # Create datasets folder if it does not exist
+    datasets_dir = os.path.join(data_dir, 'train')
+    if not os.path.isdir(datasets_dir):
+        os.mkdir(datasets_dir)
+
+    # Download folder train/retrieval-SfM-120k/
+    src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
+    dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
+    dl_file = 'ims.tar.gz'
+    if not os.path.isdir(dst_dir):
+        src_file = os.path.join(src_dir, dl_file)
+        dst_file = os.path.join(dst_dir, dl_file)
+        print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
+        os.makedirs(dst_dir)
+        print('>> Downloading ims.tar.gz...')
+        os.system('wget {} -O {}'.format(src_file, dst_file))
+        print('>> Extracting {}...'.format(dst_file))
+        os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
+        print('>> Extracted, deleting {}...'.format(dst_file))
+        os.system('rm {}'.format(dst_file))
+
+    # Create symlink for train/retrieval-SfM-30k/ 
+    dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
+    dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
+    if not  os.path.exists(dst_dir):
+        os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k'))
+        os.system('ln -s {} {}'.format(dst_dir_old, dst_dir))
+        print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
+
+    # Download db files
+    src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
+    datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
+    for dataset in datasets:
+        dst_dir = os.path.join(datasets_dir, dataset)
+        if dataset == 'retrieval-SfM-120k':
+            dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
+        elif dataset == 'retrieval-SfM-30k':
+            dl_files = ['{}-whiten.pkl'.format(dataset)]
+
+        if not os.path.isdir(dst_dir):
+            print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
+            os.mkdir(dst_dir)
+
+        for i in range(len(dl_files)):
+            src_file = os.path.join(src_dir, dl_files[i])
+            dst_file = os.path.join(dst_dir, dl_files[i])
+            if not os.path.isfile(dst_file):
+                print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
+                os.system('wget {} -O {}'.format(src_file, dst_file))
--- a/cirtorch/utils/download_win.py
+++ b/cirtorch/utils/download_win.py
@ -0,0 +1,152 @@
+import os
+
+def download_test(data_dir):
+    """
+    DOWNLOAD_TEST Checks, and, if required, downloads the necessary datasets for the testing.
+      
+        download_test(DATA_ROOT) checks if the data necessary for running the example script exist.
+        If not it downloads it in the folder structure:
+            DATA_ROOT/test/oxford5k/  : folder with Oxford images and ground truth file
+            DATA_ROOT/test/paris6k/   : folder with Paris images and ground truth file
+            DATA_ROOT/test/roxford5k/ : folder with Oxford images and revisited ground truth file
+            DATA_ROOT/test/rparis6k/  : folder with Paris images and revisited ground truth file
+    """
+
+    # Create data folder if it does not exist
+    if not os.path.isdir(data_dir):
+        os.mkdir(data_dir)
+    
+    # Create datasets folder if it does not exist
+    datasets_dir = os.path.join(data_dir, 'test')
+    if not os.path.isdir(datasets_dir):
+        os.mkdir(datasets_dir)
+
+    # Download datasets folders test/DATASETNAME/
+    datasets = ['oxford5k', 'paris6k', 'roxford5k', 'rparis6k']
+    for di in range(len(datasets)):
+        dataset = datasets[di]
+
+        if dataset == 'oxford5k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
+            dl_files = ['oxbuild_images.tgz']
+        elif dataset == 'paris6k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
+            dl_files = ['paris_1.tgz', 'paris_2.tgz']
+        elif dataset == 'roxford5k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/oxbuildings'
+            dl_files = ['oxbuild_images.tgz']
+        elif dataset == 'rparis6k':
+            src_dir = 'http://www.robots.ox.ac.uk/~vgg/data/parisbuildings'
+            dl_files = ['paris_1.tgz', 'paris_2.tgz']
+        else:
+            raise ValueError('Unknown dataset: {}!'.format(dataset))
+
+        dst_dir = os.path.join(datasets_dir, dataset, 'jpg')
+        if not os.path.isdir(dst_dir):
+
+            # for oxford and paris download images
+            if dataset == 'oxford5k' or dataset == 'paris6k':
+                print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
+                os.makedirs(dst_dir)
+                for dli in range(len(dl_files)):
+                    dl_file = dl_files[dli]
+                    src_file = os.path.join(src_dir, dl_file)
+                    dst_file = os.path.join(dst_dir, dl_file)
+                    print('>> Downloading dataset {} archive {}...'.format(dataset, dl_file))
+                    os.system('wget {} -O {}'.format(src_file, dst_file))
+                    print('>> Extracting dataset {} archive {}...'.format(dataset, dl_file))
+                    # create tmp folder
+                    dst_dir_tmp = os.path.join(dst_dir, 'tmp')
+                    os.system('mkdir {}'.format(dst_dir_tmp))
+                    # extract in tmp folder
+                    os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir_tmp))
+                    # remove all (possible) subfolders by moving only files in dst_dir
+                    os.system('find {} -type f -exec mv -i {{}} {} \\;'.format(dst_dir_tmp, dst_dir))
+                    # remove tmp folder
+                    os.system('rd {}'.format(dst_dir_tmp))
+                    print('>> Extracted, deleting dataset {} archive {}...'.format(dataset, dl_file))
+                    os.system('del {}'.format(dst_file))
+
+            # for roxford and rparis just make sym links
+            elif dataset == 'roxford5k' or dataset == 'rparis6k':
+                print('>> Dataset {} directory does not exist. Creating: {}'.format(dataset, dst_dir))
+                dataset_old = dataset[1:]
+                dst_dir_old = os.path.join(datasets_dir, dataset_old, 'jpg')
+                os.mkdir(os.path.join(datasets_dir, dataset))
+                os.system('cmd /c mklink /d {} {}'.format(dst_dir_old, dst_dir))
+                print('>> Created symbolic link from {} jpg to {} jpg'.format(dataset_old, dataset))
+
+
+        gnd_src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'test', dataset)
+        gnd_dst_dir = os.path.join(datasets_dir, dataset)
+        gnd_dl_file = 'gnd_{}.pkl'.format(dataset)
+        gnd_src_file = os.path.join(gnd_src_dir, gnd_dl_file)
+        gnd_dst_file = os.path.join(gnd_dst_dir, gnd_dl_file)
+        if not os.path.exists(gnd_dst_file):
+            print('>> Downloading dataset {} ground truth file...'.format(dataset))
+            os.system('wget {} -O {}'.format(gnd_src_file, gnd_dst_file))
+
+
+def download_train(data_dir):
+    """
+    DOWNLOAD_TRAIN Checks, and, if required, downloads the necessary datasets for the training.
+      
+        download_train(DATA_ROOT) checks if the data necessary for running the example script exist.
+        If not it downloads it in the folder structure:
+            DATA_ROOT/train/retrieval-SfM-120k/  : folder with rsfm120k images and db files
+            DATA_ROOT/train/retrieval-SfM-30k/   : folder with rsfm30k images and db files
+    """
+
+    # Create data folder if it does not exist
+    if not os.path.isdir(data_dir):
+        os.mkdir(data_dir)
+    print(data_dir)
+    # Create datasets folder if it does not exist
+    datasets_dir = os.path.join(data_dir, 'train')
+    if not os.path.isdir(datasets_dir):
+        os.mkdir(datasets_dir)
+
+    # Download folder train/retrieval-SfM-120k/
+    src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'ims')
+    dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
+    dl_file = 'ims.tar.gz'
+    if not os.path.isdir(dst_dir):
+        src_file = os.path.join(src_dir, dl_file)
+        dst_file = os.path.join(dst_dir, dl_file)
+        print('>> Image directory does not exist. Creating: {}'.format(dst_dir))
+        os.makedirs(dst_dir)
+        print('>> Downloading ims.tar.gz...')
+        # os.system('wget {} -O {}'.format(src_file, dst_file))
+        print('>> Extracting {}...'.format(dst_file))
+        os.system('tar -zxf {} -C {}'.format(dst_file, dst_dir))
+        print('>> Extracted, deleting {}...'.format(dst_file))
+        os.system('del {}'.format(dst_file))
+
+    # Create symlink for train/retrieval-SfM-30k/ 
+    dst_dir_old = os.path.join(datasets_dir, 'retrieval-SfM-120k', 'ims')
+    dst_dir = os.path.join(datasets_dir, 'retrieval-SfM-30k', 'ims')
+    if not os.path.isdir(dst_dir):
+        os.makedirs(os.path.join(datasets_dir, 'retrieval-SfM-30k','ims'))
+        os.system('mklink {} {}'.format(dst_dir_old, dst_dir))
+        print('>> Created symbolic link from retrieval-SfM-120k/ims to retrieval-SfM-30k/ims')
+
+    # Download db files
+    src_dir = os.path.join('http://cmp.felk.cvut.cz/cnnimageretrieval/data', 'train', 'dbs')
+    datasets = ['retrieval-SfM-120k', 'retrieval-SfM-30k']
+    for dataset in datasets:
+        dst_dir = os.path.join(datasets_dir, dataset)
+        if dataset == 'retrieval-SfM-120k':
+            dl_files = ['{}.pkl'.format(dataset), '{}-whiten.pkl'.format(dataset)]
+        elif dataset == 'retrieval-SfM-30k':
+            dl_files = ['{}-whiten.pkl'.format(dataset)]
+
+        if not os.path.isdir(dst_dir):
+            print('>> Dataset directory does not exist. Creating: {}'.format(dst_dir))
+            os.mkdir(dst_dir)
+
+        for i in range(len(dl_files)):
+            src_file = os.path.join(src_dir, dl_files[i])
+            dst_file = os.path.join(dst_dir, dl_files[i])
+            if not os.path.isfile(dst_file):
+                print('>> DB file {} does not exist. Downloading...'.format(dl_files[i]))
+                os.system('wget {} -O {}'.format(src_file, dst_file))
--- a/cirtorch/utils/evaluate.py
+++ b/cirtorch/utils/evaluate.py
@ -0,0 +1,149 @@
+import numpy as np
+
+def compute_ap(ranks, nres):
+    """
+    Computes average precision for given ranked indexes.
+    
+    Arguments
+    ---------
+    ranks : zerro-based ranks of positive images
+    nres  : number of positive images
+    
+    Returns
+    -------
+    ap    : average precision
+    """
+
+    # number of images ranked by the system
+    nimgranks = len(ranks)
+
+    # accumulate trapezoids in PR-plot
+    ap = 0
+
+    recall_step = 1. / nres
+
+    for j in np.arange(nimgranks):
+        rank = ranks[j]
+
+        if rank == 0:
+            precision_0 = 1.
+        else:
+            precision_0 = float(j) / rank
+
+        precision_1 = float(j + 1) / (rank + 1)
+
+        ap += (precision_0 + precision_1) * recall_step / 2.
+
+    return ap
+
+def compute_map(ranks, gnd, kappas=[]):
+    """
+    Computes the mAP for a given set of returned results.
+
+         Usage: 
+           map = compute_map (ranks, gnd) 
+                 computes mean average precsion (map) only
+        
+           map, aps, pr, prs = compute_map (ranks, gnd, kappas) 
+                 computes mean average precision (map), average precision (aps) for each query
+                 computes mean precision at kappas (pr), precision at kappas (prs) for each query
+        
+         Notes:
+         1) ranks starts from 0, ranks.shape = db_size X #queries
+         2) The junk results (e.g., the query itself) should be declared in the gnd stuct array
+         3) If there are no positive images for some query, that query is excluded from the evaluation
+    """
+
+    map = 0.
+    nq = len(gnd) # number of queries
+    aps = np.zeros(nq)
+    pr = np.zeros(len(kappas))
+    prs = np.zeros((nq, len(kappas)))
+    nempty = 0
+
+    for i in np.arange(nq):
+        qgnd = np.array(gnd[i]['ok'])
+
+        # no positive images, skip from the average
+        if qgnd.shape[0] == 0:
+            aps[i] = float('nan')
+            prs[i, :] = float('nan')
+            nempty += 1
+            continue
+
+        try:
+            qgndj = np.array(gnd[i]['junk'])
+        except:
+            qgndj = np.empty(0)
+
+        # sorted positions of positive and junk images (0 based)
+        pos  = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgnd)]
+        junk = np.arange(ranks.shape[0])[np.in1d(ranks[:,i], qgndj)]
+
+        k = 0;
+        ij = 0;
+        if len(junk):
+            # decrease positions of positives based on the number of
+            # junk images appearing before them
+            ip = 0
+            while (ip < len(pos)):
+                while (ij < len(junk) and pos[ip] > junk[ij]):
+                    k += 1
+                    ij += 1
+                pos[ip] = pos[ip] - k
+                ip += 1
+
+        # compute ap
+        ap = compute_ap(pos, len(qgnd))
+        map = map + ap
+        aps[i] = ap
+
+        # compute precision @ k
+        pos += 1 # get it to 1-based
+        for j in np.arange(len(kappas)):
+            kq = min(max(pos), kappas[j]); 
+            prs[i, j] = (pos <= kq).sum() / kq
+        pr = pr + prs[i, :]
+
+    map = map / (nq - nempty)
+    pr = pr / (nq - nempty)
+
+    return map, aps, pr, prs
+
+
+def compute_map_and_print(dataset, ranks, gnd, kappas=[1, 5, 10]):
+    
+    # old evaluation protocol
+    if dataset.startswith('oxford5k') or dataset.startswith('paris6k'):
+        map, aps, _, _ = compute_map(ranks, gnd)
+        print('>> {}: mAP {:.2f}'.format(dataset, np.around(map*100, decimals=2)))
+
+    # new evaluation protocol
+    elif dataset.startswith('roxford5k') or dataset.startswith('rparis6k'):
+        
+        gnd_t = []
+        for i in range(len(gnd)):
+            g = {}
+            g['ok'] = np.concatenate([gnd[i]['easy']])
+            g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['hard']])
+            gnd_t.append(g)
+        mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, kappas)
+
+        gnd_t = []
+        for i in range(len(gnd)):
+            g = {}
+            g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']])
+            g['junk'] = np.concatenate([gnd[i]['junk']])
+            gnd_t.append(g)
+        mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, kappas)
+
+        gnd_t = []
+        for i in range(len(gnd)):
+            g = {}
+            g['ok'] = np.concatenate([gnd[i]['hard']])
+            g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']])
+            gnd_t.append(g)
+        mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, kappas)
+
+        print('>> {}: mAP E: {}, M: {}, H: {}'.format(dataset, np.around(mapE*100, decimals=2), np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2)))
+        print('>> {}: mP@k{} E: {}, M: {}, H: {}'.format(dataset, kappas, np.around(mprE*100, decimals=2), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2)))
--- a/cirtorch/utils/general.py
+++ b/cirtorch/utils/general.py
@ -0,0 +1,34 @@
+import os
+import hashlib
+
+def get_root():
+    return os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
+
+
+def get_data_root():
+    return os.path.join(get_root(), 'data')
+
+
+def htime(c):
+    c = round(c)
+    
+    days = c // 86400
+    hours = c // 3600 % 24
+    minutes = c // 60 % 60
+    seconds = c % 60
+
+    if days > 0:
+        return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds)
+    if hours > 0:
+        return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds)
+    if minutes > 0:
+        return '{:d}m {:d}s'.format(minutes, seconds)
+    return '{:d}s'.format(seconds)
+
+
+def sha256_hash(filename, block_size=65536, length=8):
+    sha256 = hashlib.sha256()
+    with open(filename, 'rb') as f:
+        for block in iter(lambda: f.read(block_size), b''):
+            sha256.update(block)
+    return sha256.hexdigest()[:length-1]
--- a/cirtorch/utils/whiten.py
+++ b/cirtorch/utils/whiten.py
@ -0,0 +1,65 @@
+import os
+import numpy as np
+
+def whitenapply(X, m, P, dimensions=None):
+    
+    if not dimensions:
+        dimensions = P.shape[0]
+
+    X = np.dot(P[:dimensions, :], X-m)
+    X = X / (np.linalg.norm(X, ord=2, axis=0, keepdims=True) + 1e-6)
+
+    return X
+
+def pcawhitenlearn(X):
+
+    N = X.shape[1]
+
+    # Learning PCA w/o annotations
+    m = X.mean(axis=1, keepdims=True)
+    Xc = X - m
+    Xcov = np.dot(Xc, Xc.T)
+    Xcov = (Xcov + Xcov.T) / (2*N)
+    eigval, eigvec = np.linalg.eig(Xcov)
+    order = eigval.argsort()[::-1]
+    eigval = eigval[order]
+    eigvec = eigvec[:, order]
+
+    P = np.dot(np.linalg.inv(np.sqrt(np.diag(eigval))), eigvec.T)
+    
+    return m, P
+
+def whitenlearn(X, qidxs, pidxs):
+
+    # Learning Lw w annotations
+    m = X[:, qidxs].mean(axis=1, keepdims=True)
+    df = X[:, qidxs] - X[:, pidxs]
+    S = np.dot(df, df.T) / df.shape[1]
+    P = np.linalg.inv(cholesky(S))
+    df = np.dot(P, X-m)
+    D = np.dot(df, df.T)
+    eigval, eigvec = np.linalg.eig(D)
+    order = eigval.argsort()[::-1]
+    eigval = eigval[order]
+    eigvec = eigvec[:, order]
+
+    P = np.dot(eigvec.T, P)
+
+    return m, P
+
+def cholesky(S):
+    # Cholesky decomposition
+    # with adding a small value on the diagonal
+    # until matrix is positive definite
+    alpha = 0
+    while 1:
+        try:
+            L = np.linalg.cholesky(S + alpha*np.eye(*S.shape))
+            return L
+        except:
+            if alpha == 0:
+                alpha = 1e-10
+            else:
+                alpha *= 10
+            print(">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} on the diagonal"
+                .format(os.path.basename(__file__), alpha))