last update in 2024

2024-12-31 16:45:04 +08:00
parent dac3b3f2b6
commit 7e13e0f5b4
20 changed files with 1349 additions and 389 deletions
--- a/contrast/one2one_contrast.py
+++ b/contrast/one2one_contrast.py
@ -11,7 +11,7 @@ Created on Fri Aug 30 17:53:03 2024
            标准特征提取，并保存至文件夹 stdFeaturePath 中，
            也可在运行过程中根据与购物事件集合 barcodes 交集执行
    2. 1:1 比对性能测试，
-        func: one2one_eval(similPath)
+        func: one2one_simi()
        (1) 求购物事件和标准特征级 Barcode 交集，构造 evtDict、stdDict
        (2) 构造扫 A 放 A、扫 A 放 B 组合，mergePairs = AA_list + AB_list
        (3) 循环计算 mergePairs 中元素 "(A, A) 或 (A, B)" 相似度;
@ -20,7 +20,7 @@ Created on Fri Aug 30 17:53:03 2024
        
        
    3. precise、recall等指标计算
-        func: compute_precise_recall(pickpath)
+        func: compute_one2one_pr(pickpath)
        
        
@author: ym
@ -33,6 +33,9 @@ import sys
 import random 
 import pickle
 import json
+import random
+import copy
+import sys
 # import torch
 import time
 # import json
@ -54,6 +57,7 @@ from feat_extract.config import config as conf
 from feat_extract.inference import FeatsInterface
 from utils.event import ShoppingEvent, save_data
 from genfeats import gen_bcd_features
+from event_test import calc_simil



@ -175,14 +179,53 @@ def data_precision_compare(stdfeat, evtfeat, evtMessage, save=True):
            f.write(line + '\n')


-def one2one_simi():  
+
+
+def simi_calc(event, stdfeat):
+    evtfeat = event.feats_compose
+    if isinstance(event.feats_select, list):
+        if len(event.feats_select) and len(event.feats_select[0]):
+            evtfeat = event.feats_select[0]
+        else:
+            return None, None, None
+    else:
+        evtfeat = event.feats_select
+
+    if len(evtfeat)==0 or len(stdfeat)==0:
+        return None, None, None
+    
+    matrix = 1 - cdist(evtfeat, stdfeat, 'cosine')
+    matrix[matrix < 0] = 0
+    
+    simi_mean = np.mean(matrix)
+    simi_max  = np.max(matrix)
+    stdfeatm = np.mean(stdfeat, axis=0, keepdims=True)
+    evtfeatm = np.mean(evtfeat, axis=0, keepdims=True)
+    simi_mfeat = 1- np.maximum(0.0, cdist(stdfeatm, evtfeatm, 'cosine'))
+    
+    return simi_mean, simi_max, simi_mfeat[0,0]
+
+
+def build_std_evt_dict():
    '''
    stdFeaturePath: 标准特征集地址
    eventDataPath: Event对象地址
    '''
    
-    stdBarcode = [p.stem for p in Path(stdFeaturePath).iterdir() if p.is_file() and p.suffix=='.pickle']
+    # stdBarcode = [p.stem for p in Path(stdFeaturePath).iterdir() if p.is_file() and p.suffix=='.json']
    
+    '''*********** USearch ***********'''
+    stdFeaturePath = r"D:\contrast\stdlib\v11_test.json"
+    stdBarcode = []
+    stdlib = {}
+    with open(stdFeaturePath, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    for dic in data['total']:
+        barcode = dic['key']
+        feature = np.array(dic['value'])
+        stdBarcode.append(barcode)
+        stdlib[barcode] = feature
+
    '''======1. 购物事件列表，该列表中的 Barcode 存在于标准的 stdBarcode 内 ==='''    
    evtList = [(p.stem, p.stem.split('_')[-1]) for p in Path(eventDataPath).iterdir() 
                  if p.is_file()
@ -192,16 +235,21 @@ def one2one_simi():
                  and p.stem.split('_')[-1] in stdBarcode
                  ]
    barcodes = set([bcd for _, bcd in evtList])
-    
+        
    '''======2. 构建用于比对的标准特征字典 ============='''
+    # stdDict = {}
+    # for barcode in barcodes:
+    #     stdpath = os.path.join(stdFeaturePath, barcode+'.json')
+    #     with open(stdpath, 'r', encoding='utf-8') as f:  
+    #         stddata = json.load(f)
+    #         feat = np.array(stddata["value"])
+    #         stdDict[barcode] = feat
+    
+    '''*********** USearch ***********'''        
    stdDict = {}
    for barcode in barcodes:
-        stdpath = os.path.join(stdFeaturePath, barcode+'.pickle')
-        with open(stdpath, 'rb') as f:  
-            stddata = pickle.load(f)
-        stdDict[barcode] = stddata
-    
-    
+        stdDict[barcode] = stdlib[barcode]  
+        
    '''======3. 构建用于比对的操作事件字典 ============='''
    evtDict = {}
    for evtname, barcode in evtList:
@ -209,21 +257,123 @@ def one2one_simi():
        with open(evtpath, 'rb') as f:  
            evtdata = pickle.load(f)
        evtDict[evtname] = evtdata
+        
+    return evtList, evtDict, stdDict
+
+def one2SN_pr(evtList, evtDict, stdDict):  
+    
+    std_barcodes = set([bcd for _, bcd in evtList])
+    
+    
+    tp_events, fn_events, fp_events, tn_events = [], [], [], []
+    tp_simi, fn_simi, tn_simi, fp_simi = [], [], [], []
+    errorFile_one2SN = []
+    
+    SN = 9
+    for evtname, barcode in evtList:
+        bcd_selected = [barcode]
+        
+        dset = list(std_barcodes - set([barcode]))
+        if len(dset) > SN:
+            random.shuffle(dset)
+            bcd_selected.extend(dset[:SN])
+        else:
+            bcd_selected.extend(dset)
+
+        event = evtDict[evtname]
+        ## 无轨迹判断
+        if len(event.front_feats)+len(event.back_feats)==0:
+            print(evtname)
+            continue
+
+        barcodes, similars = [], []
+        for stdbcd in bcd_selected:
+            stdfeat = stdDict[stdbcd]
+            # simi_mean, simi_max, simi_mfeat = simi_calc(event, stdfeat)
+            simi_mean = calc_simil(event, stdfeat)
+            
+            ## 在event.front_feats和event.back_feats同时为空时，此处不需要保护
+            # if simi_mean==None:
+            #     continue
+            
+            barcodes.append(stdbcd)
+            similars.append(simi_mean)
+        
+        ## 此处不需要保护
+        # if len(similars)==0:
+        #     print(evtname)
+        #     continue      
+            
+        max_idx = similars.index(max(similars))
+        max_sim = similars[max_idx]
+        for i in range(len(barcodes)):
+            bcd, simi = barcodes[i], similars[i]
+            if bcd==barcode and simi==max_sim:
+                tp_simi.append(simi)
+                tp_events.append(evtname)
+            elif bcd==barcode and simi!=max_sim:
+                fn_simi.append(simi)
+                fn_events.append(evtname)
+            elif bcd!=barcode and simi!=max_sim: 
+                tn_simi.append(simi)
+                tn_events.append(evtname)
+            elif bcd!=barcode and simi==max_sim and barcode in barcodes:
+                fp_simi.append(simi)
+                fp_events.append(evtname)
+            else:
+                errorFile_one2SN.append(evtname)
+    
+    PPreciseX, PRecallX = [], []
+    NPreciseX, NRecallX = [], []
+    Thresh = np.linspace(-0.2, 1, 100)
+    for th in Thresh:
+        '''适用于 (Precise, Recall) 计算方式：多个相似度计算并排序，barcode相等且排名第一为 TP '''
+        '''===================================== 1:SN '''
+        TPX = sum(np.array(tp_simi) >= th)
+        FPX = sum(np.array(fp_simi) >= th)
+        FNX = sum(np.array(fn_simi) < th)
+        TNX = sum(np.array(tn_simi) < th)
+        PPreciseX.append(TPX/(TPX+FPX+1e-6))
+        PRecallX.append(TPX/(len(tp_simi)+len(fn_simi)+1e-6))
+        
+        NPreciseX.append(TNX/(TNX+FNX+1e-6))
+        NRecallX.append(TNX/(len(tn_simi)+len(fp_simi)+1e-6))
+        
+    fig, ax = plt.subplots()
+    ax.plot(Thresh, PPreciseX, 'r', label='Precise_Pos: TP/TPFP')
+    ax.plot(Thresh, PRecallX, 'b', label='Recall_Pos: TP/TPFN')
+    ax.plot(Thresh, NPreciseX, 'g', label='Precise_Neg: TN/TNFP')
+    ax.plot(Thresh, NRecallX, 'c', label='Recall_Neg: TN/TNFN')
+    ax.set_xlim([0, 1])
+    ax.set_ylim([0, 1])
+    ax.grid(True)
+    ax.set_title('1:SN Precise & Recall')
+    ax.set_xlabel(f"Event Num: {len(evtList)}")     
+    ax.legend()
+    plt.show()
+    ## ============================= 1:N 展厅 直方图'''
+    fig, axes = plt.subplots(2, 2)
+    axes[0, 0].hist(tp_simi, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[0, 0].set_xlim([-0.2, 1])
+    axes[0, 0].set_title(f'TP({len(tp_simi)})')
+    axes[0, 1].hist(fp_simi, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[0, 1].set_xlim([-0.2, 1])
+    axes[0, 1].set_title(f'FP({len(fp_simi)})')
+    axes[1, 0].hist(tn_simi, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[1, 0].set_xlim([-0.2, 1])
+    axes[1, 0].set_title(f'TN({len(tn_simi)})')
+    axes[1, 1].hist(fn_simi, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[1, 1].set_xlim([-0.2, 1])
+    axes[1, 1].set_title(f'FN({len(fn_simi)})')
+    plt.show()

    
-    '''======4.2 barcode 标准图像保存 =================='''
-    # for stdbcd in barcodes:    
-    #     stdImgpath = stdDict[stdbcd]["imgpaths"]
-    #     pstdpath = os.path.join(subimgPath, f"{stdbcd}")
-    #     if not os.path.exists(pstdpath):
-    #         os.makedirs(pstdpath)
-    #         ii = 1
-    #         for filepath in stdImgpath:
-    #             stdpath = os.path.join(pstdpath, f"{stdbcd}_{ii}.png")
-    #             shutil.copy2(filepath, stdpath)
-    #             ii += 1

-    '''======5 构造 3 个事件对: 扫 A 放 A, 扫 A 放 B, 合并 ===================='''
+
+def one2one_simi(evtList, evtDict, stdDict):  
+    
+    barcodes = set([bcd for _, bcd in evtList])
+    '''======1 构造 3 个事件对: 扫 A 放 A, 扫 A 放 B, 合并 ===================='''
    AA_list = [(evtname, barcode, "same") for evtname, barcode in evtList]
    AB_list = []
    for evtname, barcode in evtList:
@ -234,45 +384,36 @@ def one2one_simi():
        
    mergePairs = AA_list + AB_list

-    '''======6 计算事件、标准特征集相似度 =================='''
+    '''======2 计算事件、标准特征集相似度 =================='''
    rltdata = []  
    for i in range(len(mergePairs)):
        evtname, stdbcd, label = mergePairs[i]
        event = evtDict[evtname]
-
-        ##============================================ float32
-        stdfeat = stdDict[stdbcd]["feats_ft32"]
-        
-        evtfeat = event.feats_compose    
-        if len(evtfeat)==0: continue
-
-        matrix = 1 - cdist(stdfeat, evtfeat, 'cosine')
-        matrix[matrix < 0] = 0
-        
-        simi_mean = np.mean(matrix)
-        simi_max  = np.max(matrix)
-        stdfeatm = np.mean(stdfeat, axis=0, keepdims=True)
-        evtfeatm = np.mean(evtfeat, axis=0, keepdims=True)
-        simi_mfeat = 1- np.maximum(0.0, cdist(stdfeatm, evtfeatm, 'cosine'))
-        rltdata.append((label, stdbcd, evtname, simi_mean, simi_max, simi_mfeat[0,0]))
+        if len(event.feats_compose)==0: continue
+    
+        stdfeat = stdDict[stdbcd]  # float32
+    
+        simi_mean, simi_max, simi_mfeat = simi_calc(event, stdfeat)
+        if simi_mean is None:
+            continue
+    
+        rltdata.append((label, stdbcd, evtname, simi_mean, simi_max, simi_mfeat))
        
        '''================ float32、16、int8 精度比较与存储 ============='''
        # data_precision_compare(stdfeat, evtfeat, mergePairs[i], save=True)
        
-    print("func: one2one_eval(), have finished!")
+
    
    return rltdata
    
-            

- 
-def compute_precise_recall(rltdata): 
+def one2one_pr(rltdata): 
    Same, Cross = [], []
    for label, stdbcd, evtname, simi_mean, simi_max, simi_mft in rltdata:
        if label == "same":
-            Same.append(simi_mean)
+            Same.append(simi_max)
        if label == "diff":
-            Cross.append(simi_mean)
+            Cross.append(simi_max)

    Same = np.array(Same)
    Cross = np.array(Cross)
@ -280,11 +421,11 @@ def compute_precise_recall(rltdata):
    TNFP = len(Cross)
    
    # fig, axs = plt.subplots(2, 1)
-    # axs[0].hist(Same, bins=60, edgecolor='black')
+    # axs[0].hist(Same, bins=60, range=(-0.2, 1), edgecolor='black')
    # axs[0].set_xlim([-0.2, 1])
    # axs[0].set_title(f'Same Barcode, Num: {TPFN}')
    
-    # axs[1].hist(Cross, bins=60, edgecolor='black')
+    # axs[1].hist(Cross, bins=60, range=(-0.2, 1), edgecolor='black')
    # axs[1].set_xlim([-0.2, 1])
    # axs[1].set_title(f'Cross Barcode, Num: {TNFP}')
    # plt.savefig(f'./result/{file}_hist.png')  # svg, png, pdf
@ -324,6 +465,23 @@ def compute_precise_recall(rltdata):
    rltpath = os.path.join(similPath, 'pr.png')
    plt.savefig(rltpath)  # svg, png, pdf
    
+    
+    fig, axes = plt.subplots(2,1)
+    axes[0].hist(Same, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[0].set_xlim([-0.2, 1])
+    axes[0].set_title(f'TP({len(Same)})')
+
+    axes[1].hist(Cross, bins=60, range=(-0.2, 1), edgecolor='black')
+    axes[1].set_xlim([-0.2, 1])
+    axes[1].set_title(f'TN({len(Cross)})')
+    
+    rltpath = os.path.join(similPath, 'hist.png')
+    plt.savefig(rltpath) 
+    
+
+    plt.show()
+    
+    

 def gen_eventdict(sourcePath, saveimg=True):         
    k, errEvents = 0, []
@ -358,9 +516,7 @@ def gen_eventdict(sourcePath, saveimg=True):
            f.write(line + '\n')
            

-
-def test_one2one(): 
-    
+def init_std_evt_dict():
    '''==== 0. 生成事件列表和对应的 Barcodes列表 ==========='''
    bcdList, event_spath = [], []
    for evtpath in eventSourcePath:
@ -383,10 +539,33 @@ def test_one2one():
    print("eventList have generated and saved!")
    

-    '''==== 3. 1:1性能评估 ===============''' 
-    rltdata = one2one_simi()
-    compute_precise_recall(rltdata)
+
+
+def test_one2one(): 
+    '''1:1性能评估''' 
    
+    # 1. 只需运行一次，生成事件字典和相应的标准特征库字典
+    # init_std_evt_dict()
+    
+    # 2. 基于事件barcode集和标准库barcode交集构造事件集合
+    evtList, evtDict, stdDict = build_std_evt_dict()
+    
+    rltdata = one2one_simi(evtList, evtDict, stdDict)
+    
+    one2one_pr(rltdata)
+    
+
+def test_one2SN():  
+    '''1:SN性能评估''' 
+    
+    # 1. 只需运行一次，生成事件字典和相应的标准特征库字典
+    # init_std_evt_dict()
+    
+    # 2. 事件barcode集和标准库barcode求交集
+    evtList, evtDict, stdDict = build_std_evt_dict()
+    
+    one2SN_pr(evtList, evtDict, stdDict)    
+
    
 if __name__ == '__main__':
    '''
@ -402,7 +581,7 @@ if __name__ == '__main__':
    
    stdSamplePath = r"\\192.168.1.28\share\数据\已完成数据\展厅数据\v1.0\比对数据\整理\zhantingBase"
    stdBarcodePath = r"D:\exhibition\dataset\bcdpath"
-    stdFeaturePath = r"D:\exhibition\dataset\feats"
+    stdFeaturePath = r"\\192.168.1.28\share\数据\已完成数据\比对数据\barcode\all_totalBarocde\features_json\v11_barcode_11592"
    
    # eventSourcePath  = [r'D:\exhibition\images\20241202']
    # eventSourcePath  = [r"\\192.168.1.28\share\测试视频数据以及日志\各模块测试记录\展厅测试\1129_展厅模型v801测试组测试"]
@ -419,6 +598,8 @@ if __name__ == '__main__':
        os.makedirs(similPath)

    test_one2one()
+    
+    # test_one2SN()