rebuild

2025-06-11 15:23:50 +08:00
commit 37ecef40f7
79 changed files with 26981 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,11 @@
 *.pth
 blog/
 data/
 experiment/
 log/
 shop_xlsx/
 loss/
 checkpoints/
 search_library/
 quant_imgs/
 README.md
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
 # 默认忽略的文件
 /shelf/
 /workspace.xml
 # 基于编辑器的 HTTP 客户端请求
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
--- a/.idea/CopilotChatHistory.xml
+++ b/.idea/CopilotChatHistory.xml
--- a/.idea/CopilotSideBarWebPersist.xml
+++ b/.idea/CopilotSideBarWebPersist.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="CopilotSideBarWebPersist">
    <option name="autoAddFileCloseState" value="true" />
  </component>
 </project>
--- a/.idea/CopilotWebChatHistory.xml
+++ b/.idea/CopilotWebChatHistory.xml
--- a/.idea/contrast_nettest.iml
+++ b/.idea/contrast_nettest.iml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="jdk" jdkName="服务器3-NV4090-env:py-contrast-nettest" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
@ -0,0 +1,114 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="PublishConfigData" autoUpload="Always" serverName="lc@192.168.10.89:22 password (6)" exclude=".svn;.cvs;.idea;.DS_Store;.git;.hg;*.hprof;*.pyc;*.jpg;*.mp4;data/" remoteFilesAllowedToDisappearOnAutoupload="false" confirmBeforeUploading="false">
    <option name="confirmBeforeUploading" value="false" />
    <serverData>
      <paths name="contrast_nettest">
        <serverdata>
          <mappings>
            <mapping deploy="/contrast_nettest" local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="ieemoo0169@192.168.10.93:22 password">
        <serverdata>
          <mappings>
            <mapping deploy="/home/ieemoo0169/contrast_nettest" local="$PROJECT_DIR$" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.56:22 password">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (10)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (11)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (2)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (3)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (4)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (5)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (6)">
        <serverdata>
          <mappings>
            <mapping deploy="/home/lc/contrast_nettest" local="$PROJECT_DIR$" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (7)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (8)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="lc@192.168.10.89:22 password (9)">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
      <paths name="yolov5">
        <serverdata>
          <mappings>
            <mapping local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
    </serverData>
    <option name="myAutoUpload" value="ALWAYS" />
  </component>
 </project>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="Black">
    <option name="sdkName" value="Remote Python 3.8.18 (sftp://lc@192.168.1.142:22/home/lc/project/miniconda3/envs/my_env/bin/python)" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="服务器3-NV4090-env:py-contrast-nettest" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/contrast_nettest.iml" filepath="$PROJECT_DIR$/.idea/contrast_nettest.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/sshConfigs.xml
+++ b/.idea/sshConfigs.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="SshConfigs">
    <configs>
      <sshConfig authType="PASSWORD" connectionConfig="{&quot;serverAliveInterval&quot;:300}" host="192.168.1.28" id="f9cd63ee-d39a-42a7-b369-1eb74d4f71ae" port="22" nameFormat="DESCRIPTIVE" username="ieemoo0169" useOpenSSHConfig="true" />
    </configs>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/.idea/webServers.xml
+++ b/.idea/webServers.xml
@ -0,0 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="WebServers">
    <option name="servers">
      <webServer id="422a5cdc-8aff-4e1f-9f9a-2377f5a31f0b" name="contrast_nettest">
        <fileTransfer rootFolder="/home/ieemoo0169" accessType="SFTP" host="192.168.1.28" port="22" sshConfigId="74dc3f38-9a9b-4eb8-ae6f-ed04cca88f27" sshConfig="ieemoo0169@192.168.1.28:22 password">
          <advancedOptions>
            <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
          </advancedOptions>
        </fileTransfer>
      </webServer>
    </option>
  </component>
 </project>
--- a/.vscode/sftp.json
+++ b/.vscode/sftp.json
@ -0,0 +1,9 @@
 {
    "name": "My Server",
    "host": "localhost",
    "protocol": "sftp",
    "port": 22,
    "username": "username",
    "remotePath": "/",
    "uploadOnSave": true
 }
--- a/pycache/config.cpython-38.pyc
+++ b/pycache/config.cpython-38.pyc
--- a/pycache/test_ori.cpython-38.pyc
+++ b/pycache/test_ori.cpython-38.pyc
--- a/config.py
+++ b/config.py
@ -0,0 +1,122 @@
 import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as F
 def pad_to_square(img):
    w, h = img.size
    max_wh = max(w, h)
    padding = [(max_wh - w) // 2, (max_wh - h) // 2, (max_wh - w) // 2, (max_wh - h) // 2]  # (left, top, right, bottom)
    return F.pad(img, padding, fill=0, padding_mode='constant')
 class Config:
    # network settings
    backbone = 'resnet18'  # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large,
    # mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5, vit_base]
    metric = 'arcface'  # [cosface, arcface, softmax]
    cbam = False
    embedding_size = 256  # 256  # gift:2  contrast:256
    drop_ratio = 0.5
    img_size = 224
    multiple_cards = True  # 多卡加载
    model_half = False  # 模型半精度测试
    data_half = True  # 数据半精度测试
    channel_ratio = 0.75  # 通道剪枝比例
    quantization_test = False  # int8量化模型测试
    # custom base_data settings
    custom_backbone = False  # 迁移学习载入除最后一层的所有层
    custom_num_classes = 128  # 迁移学习的类别数量
    # if quantization_test:
    #     device = torch.device('cpu')
    # else:
    #     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    teacher = 'vit'  # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1,
    # PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5]
    student = 'resnet'
    # data preprocess
    """transforms.RandomCrop(size),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.RandomHorizontalFlip(),
        RandomRotate(15, 0.3),
        # RandomGaussianBlur()"""
    train_transform = T.Compose([
        T.Lambda(pad_to_square),  # 补边
        T.ToTensor(),
        T.Resize((img_size, img_size), antialias=True),
        # T.RandomCrop(img_size * 4 // 5),
        T.RandomHorizontalFlip(p=0.5),
        T.RandomRotation(180),
        T.ColorJitter(brightness=0.5),
        T.ConvertImageDtype(torch.float32),
        T.Normalize(mean=[0.5], std=[0.5]),
    ])
    test_transform = T.Compose([
        # T.Lambda(pad_to_square),  # 补边
        T.ToTensor(),
        T.Resize((img_size, img_size), antialias=True),
        T.ConvertImageDtype(torch.float32),
        # T.Normalize(mean=[0,0,0], std=[255,255,255]),
        T.Normalize(mean=[0.5], std=[0.5]),
    ])
    # dataset
    train_root = '../data_center/scatter/train'  # ['./data/2250_train/base_data', # './data/2000_train/base_data', './data/zhanting/base_data', './data/base_train/one_stage/train']
    test_root = '../data_center/scatter/val'  # ["./data/2250_train/val", "./data/2000_train/val/", './data/zhanting/val', './data/base_train/one_stage/val']
    # training settings
    checkpoints = "checkpoints/resnet18_scatter_6.2/"  # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3]
    restore = True
    # restore_model = "checkpoints/renet18_2250_0315/best_resnet18_2250_0315.pth"    # best_resnet18_1491_0306.pth
    restore_model = "checkpoints/resnet18_scatter_6.2/best.pth"  # best_resnet18_1491_0306.pth
    # test settings
    testbackbone = 'resnet18'  # [resnet18, mobilevit_s, mobilenet_v2, mobilenetv3_small, mobilenetv3_large, mobilenet_v1, PPLCNET_x1_0, PPLCNET_x0_5]
    # test_val = "./data/2250_train"
    # test_list = "./data/2250_train/val_pair.txt"
    # test_group_json = "./data/2250_train/cross_same.json"
    test_val = "../data_center/scatter/"  # [../data_center/contrast_learning/model_test_data/val_2250]
    test_list = "../data_center/scatter/val_pair.txt"  # [./data/test/public_single_pairs.txt]
    test_group_json = "../data_center/contrast_learning/model_test_data/test/inner_group_pairs.json"  # [./data/2250_train/cross_same.json]
    # test_group_json = "./data/test/inner_group_pairs.json"
    # test_model = "checkpoints/resnet18_scatter_6.2/best.pth"
    test_model = "checkpoints/resnet18_1009/best.pth"
    # test_model = "checkpoints/zhanting/inland/res_801.pth"
    # test_model = "checkpoints/resnet18_20250504/best.pth"
    # test_model = "checkpoints/resnet18_vit-base_20250430/best.pth"
    group_test = False
    # group_test = False
    train_batch_size = 128  # 256
    test_batch_size = 128  # 256
    epoch = 5  # 512
    optimizer = 'sgd'  # ['sgd', 'adam'， 'adamw']
    lr = 5e-3  # 1e-2
    lr_step = 10  # 10
    lr_decay = 0.98  # 0.98
    weight_decay = 5e-4
    loss = 'cross_entropy'  # ['focal_loss', 'cross_entropy']
    log_path = './log'
    lr_min = 1e-6  # min lr
    pin_memory = False  # if memory is large, set it True to speed up a bit
    num_workers = 32  # 64
    compare = False  # compare the result of different models
    '''
    train_distill settings
    '''
    warmup_epochs = 3  # warmup_epoch
    distributed = True  # distributed training
    teacher_path = "./checkpoints/resnet50_0519/best.pth"
    distill_weight = 0.8  # 蒸馏权重
 config = Config()
--- a/configs/init.py
+++ b/configs/init.py
@ -0,0 +1 @@
 from .utils import trainer_tools
--- a/configs/compare.yml
+++ b/configs/compare.yml
@ -0,0 +1,69 @@
 # configs/compare.yml
 # 专为模型训练对比设计的配置文件
 # 支持对比不同训练策略（如蒸馏vs独立训练）
 # 基础配置
 base:
  experiment_name: "model_comparison"  # 实验名称（用于结果保存目录）
  seed: 42                            # 随机种子（保证可复现性）
  device: "cuda"                      # 训练设备（cuda/cpu）
  log_level: "info"                   # 日志级别（debug/info/warning/error）
  embedding_size: 256                 # 特征维度
  pin_memory: true                    # 是否启用pin_memory
  distributed: true                  # 是否启用分布式训练
 # 模型配置
 models:
  backbone: 'resnet18'
  channel_ratio: 0.75
 # 训练参数
 training:
  epochs: 600                         # 总训练轮次
  batch_size: 128                     # 批次大小
  lr: 0.001                           # 初始学习率
  optimizer: "sgd"                    # 优化器类型
  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
  lr_step: 10                         # 学习率调整间隔（epoch）
  lr_decay: 0.98                      # 学习率衰减率
  weight_decay: 0.0005                  # 权重衰减
  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
  num_workers: 32                     # 数据加载线程数
  checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
  restore: false
  restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
 # 验证参数
 validation:
  num_workers: 32                     # 数据加载线程数
  val_batch_size: 128                 #  测试批次大小
 # 数据配置
 data:
  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
  train_batch_size: 128               # 训练批次大小
  val_batch_size: 128                 # 验证批次大小
  num_workers: 32                      # 数据加载线程数
  data_train_dir: "../data_center/contrast_learning/data_base/train"   # 训练数据集根目录
  data_val_dir: "../data_center/contrast_learning/data_base/val"   # 验证数据集根目录
 transform:
  img_size: 224                     # 图像尺寸
  img_mean: 0.5                     # 图像均值
  img_std: 0.5                      # 图像方差
  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
  RandomRotation: 180               # 随机旋转角度
  ColorJitter: 0.5                   # 随机颜色抖动强度
 # 日志与监控
 logging:
  logging_dir: "./logs"                  # 日志保存目录
  tensorboard: true                   # 是否启用TensorBoard
  checkpoint_interval: 30             # 检查点保存间隔（epoch）
 # 分布式训练（可选）
 distributed:
  enabled: false                      # 是否启用分布式训练
  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/distill.yml
+++ b/configs/distill.yml
@ -0,0 +1,75 @@
 # configs/compare.yml
 # 专为模型训练对比设计的配置文件
 # 支持对比不同训练策略（如蒸馏vs独立训练）
 # 基础配置
 base:
  experiment_name: "model_comparison"  # 实验名称（用于结果保存目录）
  seed: 42                            # 随机种子（保证可复现性）
  device: "cuda"                      # 训练设备（cuda/cpu）
  log_level: "info"                   # 日志级别（debug/info/warning/error）
  embedding_size: 256                 # 特征维度
  pin_memory: true                    # 是否启用pin_memory
  distributed: true                  # 是否启用分布式训练
 # 模型配置
 models:
  backbone: 'resnet18'
  channel_ratio: 1.0                  # 主干特征通道缩放比例(默认)
  student_channel_ratio: 0.75
  teacher_model_path: "./checkpoints/resnet50_0519/best.pth"
 # 训练参数
 training:
  epochs: 600                         # 总训练轮次
  batch_size: 128                     # 批次大小
  lr: 0.001                           # 初始学习率
  optimizer: "sgd"                    # 优化器类型
  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
  lr_step: 10                         # 学习率调整间隔（epoch）
  lr_decay: 0.98                      # 学习率衰减率
  weight_decay: 0.0005                  # 权重衰减
  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
  num_workers: 32                     # 数据加载线程数
  checkpoints: "./checkpoints/resnet18_test/" # 模型保存目录
  restore: false
  restore_model: "resnet18_test/epoch_600.pth" # 模型恢复路径
  distill_weight: 0.8                  # 蒸馏损失权重
  temperature: 4                    # 蒸馏温度
 # 验证参数
 validation:
  num_workers: 32                     # 数据加载线程数
  val_batch_size: 128                 #  测试批次大小
 # 数据配置
 data:
  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
  train_batch_size: 128               # 训练批次大小
  val_batch_size: 100                 # 验证批次大小
  num_workers: 4                      # 数据加载线程数
  data_train_dir: "../data_center/contrast_learning/data_base/train"   # 训练数据集根目录
  data_val_dir: "../data_center/contrast_learning/data_base/val"   # 验证数据集根目录
 transform:
  img_size: 224                     # 图像尺寸
  img_mean: 0.5                     # 图像均值
  img_std: 0.5                      # 图像方差
  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
  RandomRotation: 180               # 随机旋转角度
  ColorJitter: 0.5                   # 随机颜色抖动强度
 # 日志与监控
 logging:
  logging_dir: "./logs"                  # 日志保存目录
  tensorboard: true                   # 是否启用TensorBoard
  checkpoint_interval: 30             # 检查点保存间隔（epoch）
 # 分布式训练（可选）
 distributed:
  enabled: false                      # 是否启用分布式训练
  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/scatter.yml
+++ b/configs/scatter.yml
@ -0,0 +1,69 @@
 # configs/scatter.yml
 # 专为模型训练对比设计的配置文件
 # 支持对比不同训练策略（如蒸馏vs独立训练）
 # 基础配置
 base:
  device: "cuda"                      # 训练设备（cuda/cpu）
  log_level: "info"                   # 日志级别（debug/info/warning/error）
  embedding_size: 256                 # 特征维度
  pin_memory: true                    # 是否启用pin_memory
  distributed: true                  # 是否启用分布式训练
 # 模型配置
 models:
  backbone: 'resnet18'
  channel_ratio: 1.0
 # 训练参数
 training:
  epochs: 300                         # 总训练轮次
  batch_size: 64                     # 批次大小
  lr: 0.005                           # 初始学习率
  optimizer: "sgd"                    # 优化器类型
  metric: 'arcface'                   # 损失函数类型（可选：arcface/cosface/sphereface/softmax）
  loss: "cross_entropy"               # 损失函数类型（可选：cross_entropy/cross_entropy_smooth/center_loss/center_loss_smooth/arcface/cosface/sphereface/softmax）
  lr_step: 10                         # 学习率调整间隔（epoch）
  lr_decay: 0.98                      # 学习率衰减率
  weight_decay: 0.0005                  # 权重衰减
  scheduler: "cosine_annealing"       # 学习率调度器（可选：cosine_annealing/step/none）
  num_workers: 32                     # 数据加载线程数
  checkpoints: "./checkpoints/resnet18_scatter_6.2/" # 模型保存目录
  restore: True
  restore_model: "checkpoints/resnet18_scatter_6.2/best.pth" # 模型恢复路径
 # 验证参数
 validation:
  num_workers: 32                     # 数据加载线程数
  val_batch_size: 128                 #  测试批次大小
 # 数据配置
 data:
  dataset: "imagenet"                 # 数据集名称（示例用，可替换为实际数据集）
  train_batch_size: 128               # 训练批次大小
  val_batch_size: 100                 # 验证批次大小
  num_workers: 32                      # 数据加载线程数
  data_train_dir: "../data_center/scatter/train"   # 训练数据集根目录
  data_val_dir: "../data_center/scatter/val"   # 验证数据集根目录
 transform:
  img_size: 224                     # 图像尺寸
  img_mean: 0.5                     # 图像均值
  img_std: 0.5                      # 图像方差
  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
  RandomRotation: 180               # 随机旋转角度
  ColorJitter: 0.5                   # 随机颜色抖动强度
 # 日志与监控
 logging:
  logging_dir: "./log/2025.6.2-scatter.txt"                  # 日志保存目录
  tensorboard: true                   # 是否启用TensorBoard
  checkpoint_interval: 30             # 检查点保存间隔（epoch）
 # 分布式训练（可选）
 distributed:
  enabled: false                      # 是否启用分布式训练
  backend: "nccl"                     # 分布式后端（nccl/gloo）
--- a/configs/test.yml
+++ b/configs/test.yml
@ -0,0 +1,41 @@
 # configs/test.yml
 # 专为模型训练对比设计的配置文件
 # 支持对比不同训练策略（如蒸馏vs独立训练）
 # 基础配置
 base:
  device: "cuda"                      # 训练设备（cuda/cpu）
  log_level: "info"                   # 日志级别（debug/info/warning/error）
  embedding_size: 256                 # 特征维度
  pin_memory: true                    # 是否启用pin_memory
  distributed: true                  # 是否启用分布式训练
 # 模型配置
 models:
  backbone: 'resnet18'
  channel_ratio: 1.0
  model_path: "./checkpoints/resnet18_scatter_6.2/best.pth"
  half: false                        # 是否启用半精度测试（fp16）
 # 数据配置
 data:
  group_test: False                 # 数据集名称（示例用，可替换为实际数据集）
  test_batch_size: 128               # 训练批次大小
  num_workers: 32                      # 数据加载线程数
  test_dir: "../data_center/scatter/"   # 验证数据集根目录
  test_group_json: "../data_center/contrast_learning/model_test_data/test/inner_group_pairs.json"
  test_list: "../data_center/scatter/val_pair.txt"
 transform:
  img_size: 224                     # 图像尺寸
  img_mean: 0.5                     # 图像均值
  img_std: 0.5                      # 图像方差
  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
  RandomRotation: 180               # 随机旋转角度
  ColorJitter: 0.5                   # 随机颜色抖动强度
 save:
  save_dir: ""
  save_name: ""
--- a/configs/utils.py
+++ b/configs/utils.py
@ -0,0 +1,56 @@
 from model import (resnet18, mobilevit_s, MobileNetV3_Small, MobileNetV3_Large, mobilenet_v1,
                   PPLCNET_x1_0, PPLCNET_x0_5, PPLCNET_x2_5)
 from timm.models import vit_base_patch16_224 as vit_base_16
 from model.metric import ArcFace, CosFace
 import torch.optim as optim
 import torch.nn as nn
 import timm
 class trainer_tools:
    def __init__(self, conf):
        self.conf = conf
    def get_backbone(self):
        backbone_mapping = {
            'resnet18': lambda: resnet18(scale=self.conf['models']['channel_ratio']),
            'mobilevit_s': lambda: mobilevit_s(),
            'mobilenetv3_small': lambda: MobileNetV3_Small(),
            'PPLCNET_x1_0': lambda: PPLCNET_x1_0(),
            'PPLCNET_x0_5': lambda: PPLCNET_x0_5(),
            'PPLCNET_x2_5': lambda: PPLCNET_x2_5(),
            'mobilenetv3_large': lambda: MobileNetV3_Large(),
            'vit_base': lambda: vit_base_16(pretrained=True),
            'efficientnet': lambda: timm.create_model('efficientnet_b0', pretrained=True,
                                                      num_classes=self.conf.embedding_size)
        }
        return backbone_mapping
    def get_metric(self, class_num):
        # 优化后的metric选择代码块，使用字典映射提高可读性和扩展性
        metric_mapping = {
            'arcface': lambda: ArcFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
            'cosface': lambda: CosFace(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device']),
            'softmax': lambda: nn.Linear(self.conf['base']['embedding_size'], class_num).to(self.conf['base']['device'])
        }
        return metric_mapping
    def get_optimizer(self, model, metric):
        optimizer_mapping = {
            'sgd': lambda: optim.SGD(
                [{'params': model.parameters()}, {'params': metric.parameters()}],
                lr=self.conf['training']['lr'],
                weight_decay=self.conf['training']['weight_decay']
            ),
            'adam': lambda: optim.Adam(
                [{'params': model.parameters()}, {'params': metric.parameters()}],
                lr=self.conf['training']['lr'],
                weight_decay=self.conf['training']['weight_decay']
            ),
            'adamw': lambda: optim.AdamW(
                [{'params': model.parameters()}, {'params': metric.parameters()}],
                lr=self.conf['training']['lr'],
                weight_decay=self.conf['training']['weight_decay']
            )
        }
        return optimizer_mapping
--- a/configs/write_feature.yml
+++ b/configs/write_feature.yml
@ -0,0 +1,47 @@
 # configs/write_feature.yml
 # 专为模型训练对比设计的配置文件
 # 支持对比不同训练策略（如蒸馏vs独立训练）
 # 基础配置
 base:
  device: "cuda"                      # 训练设备（cuda/cpu）
  log_level: "info"                   # 日志级别（debug/info/warning/error）
  embedding_size: 256                 # 特征维度
  distributed: true                  # 是否启用分布式训练
  pin_memory: true                    # 是否启用pin_memory
 # 模型配置
 models:
  backbone: 'resnet18'
  channel_ratio: 0.75
  checkpoints: "../checkpoints/resnet18_1009/best.pth"
 # 数据配置
 data:
  train_batch_size: 128               # 训练批次大小
  test_batch_size: 128                 # 验证批次大小
  num_workers: 32                      # 数据加载线程数
  half:  true                         # 是否启用半精度数据
  img_dirs_path: "/shareData/temp_data/comparison/Hangzhou_Yunhe/base_data/05-09"
 #  img_dirs_path: "/home/lc/contrast_nettest/data/feature_json"
  xlsx_pth: false  # 过滤商品， 默认None不进行过滤
 transform:
  img_size: 224                     # 图像尺寸
  img_mean: 0.5                     # 图像均值
  img_std: 0.5                      # 图像方差
  RandomHorizontalFlip: 0.5         # 随机水平翻转概率
  RandomRotation: 180               # 随机旋转角度
  ColorJitter: 0.5                   # 随机颜色抖动强度
 # 日志与监控
 logging:
  logging_dir: "./logs"                  # 日志保存目录
  tensorboard: true                   # 是否启用TensorBoard
  checkpoint_interval: 30             # 检查点保存间隔（epoch）
 save:
  json_bin: "../search_library/yunhedian_05-09.json"       # 保存整个json文件
  json_path: "../data/feature_json_compare/"    # 保存单个json文件
  error_barcodes: "error_barcodes.txt"
  barcodes_statistics: "../search_library/barcodes_statistics.txt"
--- a/model/BAM.py
+++ b/model/BAM.py
@ -0,0 +1,88 @@
 import torch.nn as nn
 import torchvision
 from torch.nn import init
 class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)
 class ChannelAttention(nn.Module):
    def __int__(self, channel, reduction, num_layers):
        super(ChannelAttention, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        gate_channels = [channel]
        gate_channels += [len(channel) // reduction] * num_layers
        gate_channels += [channel]
        self.ca = nn.Sequential()
        self.ca.add_module('flatten', Flatten())
        for i in range(len(gate_channels) - 2):
            self.ca.add_module('', nn.Linear(gate_channels[i], gate_channels[i + 1]))
            self.ca.add_module('', nn.BatchNorm1d(gate_channels[i + 1]))
            self.ca.add_module('', nn.ReLU())
        self.ca.add_module('', nn.Linear(gate_channels[-2], gate_channels[-1]))
    def forward(self, x):
        res = self.avgpool(x)
        res = self.ca(res)
        res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
        return res
 class SpatialAttention(nn.Module):
    def __int__(self, channel, reduction=16, num_lay=3, dilation=2):
        super(SpatialAttention).__init__()
        self.sa = nn.Sequential()
        self.sa.add_module('', nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=(channel // reduction) * 3))
        self.sa.add_module('', nn.BatchNorm2d(num_features=(channel // reduction)))
        self.sa.add_module('', nn.ReLU())
        for i in range(num_lay):
            self.sa.add_module('', nn.Conv2d(kernel_size=3,
                                             in_channels=(channel // reduction),
                                             out_channels=(channel // reduction),
                                             padding=1,
                                             dilation=2))
            self.sa.add_module('', nn.BatchNorm2d(channel // reduction))
            self.sa.add_module('', nn.ReLU())
        self.sa.add_module('', nn.Conv2d(channel // reduction, 1, kernel_size=1))
    def forward(self, x):
        res = self.sa(x)
        res = res.expand_as(x)
        return res
 class BAMblock(nn.Module):
    def __init__(self, channel=512, reduction=16, dia_val=2):
        super(BAMblock, self).__init__()
        self.ca = ChannelAttention(channel, reduction)
        self.sa = SpatialAttention(channel, reduction, dia_val)
        self.sigmoid = nn.Sigmoid()
    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal(m.weight, mode='fan_out')
                if m.bais is not None:
                    init.constant_(m.bias, 0)
                elif isinstance(m, nn.BatchNorm2d):
                    init.constant_(m.weight, 1)
                    init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    init.normal_(m.weight, std=0.001)
                    if m.bias is not None:
                        init.constant_(m.bias, 0)
    def forward(self, x):
        b, c, _, _ = x.size()
        sa_out = self.sa(x)
        ca_out = self.ca(x)
        weight = self.sigmoid(sa_out + ca_out)
        out = (1 + weight) * x
        return out
 if __name__ == "__main__":
    print(512 // 14)
--- a/model/CBAM.py
+++ b/model/CBAM.py
@ -0,0 +1,70 @@
 import torch
 import torch.nn as nn
 import torch.nn.init as init
 class channelAttention(nn.Module):
    def __init__(self, channel, reduction=16):
        super(channelAttention, self).__init__()
        self.Maxpooling = nn.AdaptiveMaxPool2d(1)
        self.Avepooling = nn.AdaptiveAvgPool2d(1)
        self.ca = nn.Sequential()
        self.ca.add_module('conv1',nn.Conv2d(channel, channel//reduction, 1, bias=False))
        self.ca.add_module('Relu', nn.ReLU())
        self.ca.add_module('conv2',nn.Conv2d(channel//reduction, channel, 1, bias=False))
        self.sigmod = nn.Sigmoid()
    def forward(self, x):
        M_out = self.Maxpooling(x)
        A_out = self.Avepooling(x)
        M_out = self.ca(M_out)
        A_out = self.ca(A_out)
        out = self.sigmod(M_out+A_out)
        return out
 class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super().__init__()
        self.conv = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=kernel_size, padding=kernel_size // 2)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        max_result, _ = torch.max(x, dim=1, keepdim=True)
        avg_result = torch.mean(x, dim=1, keepdim=True)
        result = torch.cat([max_result, avg_result], dim=1)
        output = self.conv(result)
        output = self.sigmoid(output)
        return output
 class CBAM(nn.Module):
    def __init__(self, channel, reduction=16, kernel_size=7):
        super().__init__()
        self.ca = channelAttention(channel, reduction)
        self.sa = SpatialAttention(kernel_size)
    def init_weights(self):
        for m in self.modules():#权重初始化
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        # b,c_,_ = x.size()
        # residual = x
        out = x*self.ca(x)
        out = out*self.sa(out)
        return out
 if __name__ == '__main__':
    input=torch.randn(50,512,7,7)
    kernel_size=input.shape[2]
    cbam = CBAM(channel=512,reduction=16,kernel_size=kernel_size)
    output=cbam(input)
    print(output.shape)
--- a/model/Tool.py
+++ b/model/Tool.py
@ -0,0 +1,37 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps
    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps, stride=2)
    def gem(self, x, p=3, eps=1e-6, stride=2):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1)), stride=2).pow(1. / p)
    def __repr__(self):
        return self.__class__.__name__ + \
            '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
            ', ' + 'eps=' + str(self.eps) + ')'
 class TripletLoss(nn.Module):
    def __init__(self, margin):
        super(TripletLoss, self).__init__()
        self.margin = margin
    def forward(self, anchor, positive, negative, size_average=True):
        distance_positive = (anchor - positive).pow(2).sum(1)
        distance_negative = (anchor - negative).pow(2).sum(1)
        losses = F.relu(distance_negative - distance_positive + self.margin)
        return losses.mean() if size_average else losses.sum()
 if __name__ == '__main__':
    print('')
--- a/model/init.py
+++ b/model/init.py
@ -0,0 +1,14 @@
 from .fmobilenet import FaceMobileNet
 # from .resnet_face import ResIRSE
 from .mobilevit import mobilevit_s
 from .metric import ArcFace, CosFace
 from .loss import FocalLoss
 from .resbam import resnet
 from .resnet_pre import resnet18, resnet34, resnet50, resnet14, CustomResNet18
 from .mobilenet_v2 import mobilenet_v2
 from .mobilenet_v3 import MobileNetV3_Small, MobileNetV3_Large
 # from .mobilenet_v1 import mobilenet_v1
 from .lcnet import PPLCNET_x0_25, PPLCNET_x0_35, PPLCNET_x0_5, PPLCNET_x0_75, PPLCNET_x1_0, PPLCNET_x1_5, PPLCNET_x2_0, \
    PPLCNET_x2_5
 from .vit import vit_base
 from .mlp import MLP
--- a/model/pycache/CBAM.cpython-38.pyc
+++ b/model/pycache/CBAM.cpython-38.pyc
--- a/model/pycache/Tool.cpython-38.pyc
+++ b/model/pycache/Tool.cpython-38.pyc
--- a/model/pycache/init.cpython-38.pyc
+++ b/model/pycache/init.cpython-38.pyc
--- a/model/pycache/fmobilenet.cpython-38.pyc
+++ b/model/pycache/fmobilenet.cpython-38.pyc
--- a/model/pycache/lcnet.cpython-38.pyc
+++ b/model/pycache/lcnet.cpython-38.pyc
--- a/model/pycache/loss.cpython-38.pyc
+++ b/model/pycache/loss.cpython-38.pyc
--- a/model/pycache/metric.cpython-38.pyc
+++ b/model/pycache/metric.cpython-38.pyc
--- a/model/pycache/mlp.cpython-38.pyc
+++ b/model/pycache/mlp.cpython-38.pyc
--- a/model/pycache/mobilenet_v1.cpython-38.pyc
+++ b/model/pycache/mobilenet_v1.cpython-38.pyc
--- a/model/pycache/mobilenet_v2.cpython-38.pyc
+++ b/model/pycache/mobilenet_v2.cpython-38.pyc
--- a/model/pycache/mobilenet_v3.cpython-38.pyc
+++ b/model/pycache/mobilenet_v3.cpython-38.pyc
--- a/model/pycache/mobilevit.cpython-38.pyc
+++ b/model/pycache/mobilevit.cpython-38.pyc
--- a/model/pycache/resbam.cpython-38.pyc
+++ b/model/pycache/resbam.cpython-38.pyc
--- a/model/pycache/resnet_pre.cpython-38.pyc
+++ b/model/pycache/resnet_pre.cpython-38.pyc
--- a/model/pycache/utils.cpython-38.pyc
+++ b/model/pycache/utils.cpython-38.pyc
--- a/model/pycache/vit.cpython-38.pyc
+++ b/model/pycache/vit.cpython-38.pyc
--- a/model/benchmark.py
+++ b/model/benchmark.py
@ -0,0 +1,142 @@
 import torch
 import torch.nn as nn
 import time
 import numpy as np
 from resnet_attention import resnet18_cbam, resnet34_cbam, resnet50_cbam
 # 设置随机种子以确保结果可复现
 torch.manual_seed(42)
 np.random.seed(42)
 # 设备配置
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"测试设备: {device}")
 # 测试参数
 batch_sizes = [1, 4, 8, 16]
 image_sizes = [224, 384, 512]
 num_runs = 100  # 每个配置运行的次数
 warmup_runs = 20  # 预热运行次数，排除启动开销
 # 模型配置
 model_configs = {
    "resnet18": {
        "base_model": lambda: resnet18_cbam(use_cbam=False),
        "attention_model": lambda: resnet18_cbam(use_cbam=True)
    },
    "resnet34": {
        "base_model": lambda: resnet34_cbam(use_cbam=False),
        "attention_model": lambda: resnet34_cbam(use_cbam=True)
    },
    "resnet50": {
        "base_model": lambda: resnet50_cbam(use_cbam=False),
        "attention_model": lambda: resnet50_cbam(use_cbam=True)
    }
 }
 # 基准测试函数
 def benchmark_model(model, input_size, batch_size, num_runs, warmup_runs):
    """
    测试模型的推理性能
    参数:
    - model: 待测试的模型
    - input_size: 输入图像尺寸
    - batch_size: 批次大小
    - num_runs: 测试运行次数
    - warmup_runs: 预热运行次数
    返回:
    - 平均推理时间(毫秒)
    - 吞吐量(样本/秒)
    """
    # 设置为评估模式
    model.eval()
    model.to(device)
    # 创建随机输入
    input_tensor = torch.randn(batch_size, 3, input_size, input_size, device=device)
    # 预热
    with torch.no_grad():
        for _ in range(warmup_runs):
            _ = model(input_tensor)
            if device.type == 'cuda':
                torch.cuda.synchronize()  # 同步GPU操作
    # 测量推理时间
    start_time = time.time()
    with torch.no_grad():
        for _ in range(num_runs):
            _ = model(input_tensor)
            if device.type == 'cuda':
                torch.cuda.synchronize()  # 同步GPU操作
    end_time = time.time()
    # 计算指标
    total_time = end_time - start_time
    avg_time_per_batch = total_time / num_runs * 1000  # 毫秒
    throughput = batch_size * num_runs / total_time  # 样本/秒
    return avg_time_per_batch, throughput
 # 运行测试
 results = {}
 for model_name, config in model_configs.items():
    results[model_name] = {}
    # 创建模型
    base_model = config["base_model"]()
    attention_model = config["attention_model"]()
    # 计算参数量
    base_params = sum(p.numel() for p in base_model.parameters() if p.requires_grad)
    attention_params = sum(p.numel() for p in attention_model.parameters() if p.requires_grad)
    param_increase = (attention_params - base_params) / base_params * 100
    print(f"\n测试模型: {model_name}")
    print(f"  基础参数量: {base_params / 1e6:.2f}M")
    print(f"  带注意力参数量: {attention_params / 1e6:.2f}M")
    print(f"  参数量增加: {param_increase:.2f}%")
    for batch_size in batch_sizes:
        for image_size in image_sizes:
            key = f"batch_{batch_size}_size_{image_size}"
            results[model_name][key] = {}
            # 测试基础模型
            base_time, base_throughput = benchmark_model(
                base_model, image_size, batch_size, num_runs, warmup_runs
            )
            # 测试注意力模型
            attention_time, attention_throughput = benchmark_model(
                attention_model, image_size, batch_size, num_runs, warmup_runs
            )
            # 计算增加的百分比
            time_increase = (attention_time - base_time) / base_time * 100
            throughput_decrease = (base_throughput - attention_throughput) / base_throughput * 100
            results[model_name][key]["base_time"] = base_time
            results[model_name][key]["attention_time"] = attention_time
            results[model_name][key]["time_increase"] = time_increase
            results[model_name][key]["base_throughput"] = base_throughput
            results[model_name][key]["attention_throughput"] = attention_throughput
            results[model_name][key]["throughput_decrease"] = throughput_decrease
            print(f"  配置: 批次大小={batch_size}, 图像尺寸={image_size}x{image_size}")
            print(f"    基础模型: 平均时间={base_time:.2f}ms, 吞吐量={base_throughput:.2f}样本/秒")
            print(f"    注意力模型: 平均时间={attention_time:.2f}ms, 吞吐量={attention_throughput:.2f}样本/秒")
            print(f"    时间增加: {time_increase:.2f}%, 吞吐量下降: {throughput_decrease:.2f}%")
 # 保存结果
 import json
 with open('benchmark_results.json', 'w') as f:
    json.dump(results, f, indent=2)
 print("\n测试完成，结果已保存到 benchmark_results.json")
--- a/model/compare.py
+++ b/model/compare.py
@ -0,0 +1,48 @@
 import torch
 from config import config as conf
 import torch.nn as nn
 import torchvision.models as models
 from model.resnet_pre import resnet18, resnet50
 # from model.vit import vit_base_patch16_224, vit_base_patch32_224
 class ContrastiveModel(nn.Module):
    def __init__(self, projection_dim, model_name, contraposition=False):
        super(ContrastiveModel, self).__init__()
        self.contraposition = contraposition
        self.base_model = self._get_model(model_name)
        if not self.contraposition:
            if 'vit' in model_name:
                dim_mlp = self.base_model.head.weight.shape[1]
                self.base_model.head = self._get_projection_layer(dim_mlp, projection_dim)
            else:
                dim_mlp = self.base_model.fc.weight.shape[1]
                self.base_model.fc = self._get_projection_layer(dim_mlp, projection_dim)
            # # 冻结除 FC 层之外的所有层
            # for name, param in self.base_model.named_parameters():
            #     if 'fc' not in name:
            #         param.requires_grad = False
    def _get_projection_layer(self, dim_mlp, projection_dim):
        return nn.Sequential(
            nn.Linear(dim_mlp, dim_mlp),
            nn.ReLU(inplace=True),
            nn.Linear(dim_mlp, projection_dim)
        )
    def _get_model(self, model_name):
        base_model = None
        if model_name == 'resnet18':
            base_model = resnet18(pretrained=True)
        elif model_name == 'resnet50':
            base_model = resnet50(pretrained=True)
        # elif model_name == 'vit':
        #     base_model = vit_base_patch32_224()
        return base_model
    def forward(self, x):
        assert self.base_model is not None, 'base_model is none'
        x = self.base_model(x)
        return x
 if __name__ == '__main__':
    pass
--- a/model/distill.py
+++ b/model/distill.py
@ -0,0 +1,182 @@
 import torch
 from torch import nn
 from torch.nn import Module
 import torch.nn.functional as F
 from vit_pytorch.vit import ViT
 from vit_pytorch.t2t import T2TViT
 from vit_pytorch.efficient import ViT as EfficientViT
 from einops import repeat
 from config import config as conf
 # helpers
 # Data Setup
 from tools.dataset import load_data
 train_dataloader, class_num = load_data(conf, training=True)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def exists(val):
    return val is not None
 def default(val, d):
    return val if exists(val) else d
 # classes
 class DistillMixin:
    def forward(self, img, distill_token=None):
        distilling = exists(distill_token)
        x = self.to_patch_embedding(img)
        b, n, _ = x.shape
        cls_tokens = repeat(self.cls_token, '1 n d -> b n d', b=b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        if distilling:
            distill_tokens = repeat(distill_token, '1 n d -> b n d', b=b)
            x = torch.cat((x, distill_tokens), dim=1)
        x = self._attend(x)
        if distilling:
            x, distill_tokens = x[:, :-1], x[:, -1]
        x = x.mean(dim=1) if self.pool == 'mean' else x[:, 0]
        x = self.to_latent(x)
        out = self.mlp_head(x)
        if distilling:
            return out, distill_tokens
        return out
 class DistillableViT(DistillMixin, ViT):
    def __init__(self, *args, **kwargs):
        super(DistillableViT, self).__init__(*args, **kwargs)
        self.args = args
        self.kwargs = kwargs
        self.dim = kwargs['dim']
        self.num_classes = kwargs['num_classes']
    def to_vit(self):
        v = ViT(*self.args, **self.kwargs)
        v.load_state_dict(self.state_dict())
        return v
    def _attend(self, x):
        x = self.dropout(x)
        x = self.transformer(x)
        return x
 class DistillableT2TViT(DistillMixin, T2TViT):
    def __init__(self, *args, **kwargs):
        super(DistillableT2TViT, self).__init__(*args, **kwargs)
        self.args = args
        self.kwargs = kwargs
        self.dim = kwargs['dim']
        self.num_classes = kwargs['num_classes']
    def to_vit(self):
        v = T2TViT(*self.args, **self.kwargs)
        v.load_state_dict(self.state_dict())
        return v
    def _attend(self, x):
        x = self.dropout(x)
        x = self.transformer(x)
        return x
 class DistillableEfficientViT(DistillMixin, EfficientViT):
    def __init__(self, *args, **kwargs):
        super(DistillableEfficientViT, self).__init__(*args, **kwargs)
        self.args = args
        self.kwargs = kwargs
        self.dim = kwargs['dim']
        self.num_classes = kwargs['num_classes']
    def to_vit(self):
        v = EfficientViT(*self.args, **self.kwargs)
        v.load_state_dict(self.state_dict())
        return v
    def _attend(self, x):
        return self.transformer(x)
 # knowledge distillation wrapper
 class DistillWrapper(Module):
    def __init__(
            self,
            *,
            teacher,
            student,
            temperature=1.,
            alpha=0.5,
            hard=False,
            mlp_layernorm=False
    ):
        super().__init__()
        # assert (isinstance(student, (
        # DistillableViT, DistillableT2TViT, DistillableEfficientViT))), 'student must be a vision transformer'
        if isinstance(student, (DistillableViT, DistillableT2TViT, DistillableEfficientViT)):
            pass
        self.teacher = teacher
        self.student = student
        dim = conf.embedding_size  # student.dim
        num_classes = class_num  # class_num  # student.num_classes
        self.temperature = temperature
        self.alpha = alpha
        self.hard = hard
        self.distillation_token = nn.Parameter(torch.randn(1, 1, dim))
        # student is vit
        # self.distill_mlp = nn.Sequential(
        #     nn.LayerNorm(dim) if mlp_layernorm else nn.Identity(),
        #     nn.Linear(dim, num_classes)
        # )
        # student is resnet
        self.distill_mlp = nn.Sequential(
            nn.LayerNorm(dim) if mlp_layernorm else nn.Identity(),
            nn.Linear(dim, num_classes).to(device)
        )
    def forward(self, img, labels, temperature=None, alpha=None, **kwargs):
        alpha = default(alpha, self.alpha)
        T = default(temperature, self.temperature)
        with torch.no_grad():
            teacher_logits = self.teacher(img)
            teacher_logits = self.distill_mlp(teacher_logits)  # teach is vit 初始化
        # student is vit
        # student_logits, distill_tokens = self.student(img, distill_token=self.distillation_token, **kwargs)
        # distill_logits = self.distill_mlp(distill_tokens)
        # student is resnet
        student_logits = self.student(img)
        distill_logits = self.distill_mlp(student_logits)
        loss = F.cross_entropy(distill_logits, labels)
        # pdb.set_trace()
        if not self.hard:
            distill_loss = F.kl_div(
                F.log_softmax(distill_logits / T, dim=-1),
                F.softmax(teacher_logits / T, dim=-1).detach(),
                reduction='batchmean')
            distill_loss *= T ** 2
        else:
            teacher_labels = teacher_logits.argmax(dim=-1)
            distill_loss = F.cross_entropy(distill_logits, teacher_labels)
        # pdb.set_trace()
        return loss * (1 - alpha) + distill_loss * alpha
--- a/model/fmobilenet.py
+++ b/model/fmobilenet.py
@ -0,0 +1,124 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)
 class ConvBn(nn.Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_c)
        )
    def forward(self, x):
        return self.net(x)
 class ConvBnPrelu(nn.Module):
    def __init__(self, in_c, out_c, kernel=(1, 1), stride=1, padding=0, groups=1):
        super().__init__()
        self.net = nn.Sequential(
            ConvBn(in_c, out_c, kernel, stride, padding, groups),
            nn.PReLU(out_c)
        )
    def forward(self, x):
        return self.net(x)
 class DepthWise(nn.Module):
    def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
        super().__init__()
        self.net = nn.Sequential(
            ConvBnPrelu(in_c, groups, kernel=(1, 1), stride=1, padding=0),
            ConvBnPrelu(groups, groups, kernel=kernel, stride=stride, padding=padding, groups=groups),
            ConvBn(groups, out_c, kernel=(1, 1), stride=1, padding=0),
        )
    def forward(self, x):
        return self.net(x)
 class DepthWiseRes(nn.Module):
    """DepthWise with Residual"""
    def __init__(self, in_c, out_c, kernel=(3, 3), stride=2, padding=1, groups=1):
        super().__init__()
        self.net = DepthWise(in_c, out_c, kernel, stride, padding, groups)
    def forward(self, x):
        return self.net(x) + x
 class MultiDepthWiseRes(nn.Module):
    def __init__(self, num_block, channels, kernel=(3, 3), stride=1, padding=1, groups=1):
        super().__init__()
        self.net = nn.Sequential(*[
            DepthWiseRes(channels, channels, kernel, stride, padding, groups)
            for _ in range(num_block)
        ])
    def forward(self, x):
        return self.net(x)
 class FaceMobileNet(nn.Module):
    def __init__(self, embedding_size):
        super().__init__()
        self.conv1 = ConvBnPrelu(1, 64, kernel=(3, 3), stride=2, padding=1)
        self.conv2 = ConvBn(64, 64, kernel=(3, 3), stride=1, padding=1, groups=64)
        self.conv3 = DepthWise(64, 64, kernel=(3, 3), stride=2, padding=1, groups=128)
        self.conv4 = MultiDepthWiseRes(num_block=4, channels=64, kernel=3, stride=1, padding=1, groups=128)
        self.conv5 = DepthWise(64, 128, kernel=(3, 3), stride=2, padding=1, groups=256)
        self.conv6 = MultiDepthWiseRes(num_block=6, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
        self.conv7 = DepthWise(128, 128, kernel=(3, 3), stride=2, padding=1, groups=512)
        self.conv8 = MultiDepthWiseRes(num_block=2, channels=128, kernel=(3, 3), stride=1, padding=1, groups=256)
        self.conv9 = ConvBnPrelu(128, 512, kernel=(1, 1))
        self.conv10 = ConvBn(512, 512, groups=512, kernel=(7, 7))
        self.flatten = Flatten()
        self.linear = nn.Linear(2048, embedding_size, bias=False)
        self.bn = nn.BatchNorm1d(embedding_size)
    def forward(self, x):
        #print('x',x.shape)
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        out = self.conv6(out)
        out = self.conv7(out)
        out = self.conv8(out)
        out = self.conv9(out)
        out = self.conv10(out)
        out = self.flatten(out)
        out = self.linear(out)
        out = self.bn(out)
        return out
 if __name__ == "__main__":
    from PIL import Image
    import numpy as np
    x = Image.open("../samples/009.jpg").convert('L')
    x = x.resize((128, 128))
    x = np.asarray(x, dtype=np.float32)
    x = x[None, None, ...]
    x = torch.from_numpy(x)
    net = FaceMobileNet(512)
    net.eval()
    with torch.no_grad():
        out = net(x)
    print(out.shape)
--- a/model/lcnet.py
+++ b/model/lcnet.py
@ -0,0 +1,233 @@
 import os
 import torch
 import torch.nn as nn
 import thop
 # try:
 #     import softpool_cuda
 #     from SoftPool import soft_pool2d, SoftPool2d
 # except ImportError:
 #     print('Please install SoftPool first: https://github.com/alexandrosstergiou/SoftPool')
 #     exit(0)
 NET_CONFIG = {
    # k, in_c, out_c, s, use_se
    "blocks2": [[3, 16, 32, 1, False]],
    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
    "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
 }
 def autopad(k, p=None):
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
    return p
 def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v
 class HardSwish(nn.Module):
    def __init__(self, inplace=True):
        super(HardSwish, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)
    def forward(self, x):
        return x * self.relu6(x+3) / 6
 class HardSigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(HardSigmoid, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)
    def forward(self, x):
        return (self.relu6(x+3)) / 6
 class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            HardSigmoid()
        )
    def forward(self, x):
        b, c, h, w = x.size()
        y = self.avgpool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)
 class DepthwiseSeparable(nn.Module):
    def __init__(self, inp, oup, dw_size, stride, use_se=False):
        super(DepthwiseSeparable, self).__init__()
        self.use_se = use_se
        self.stride = stride
        self.inp = inp
        self.oup = oup
        self.dw_size = dw_size
        self.dw_sp = nn.Sequential(
            nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
                      padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
            nn.BatchNorm2d(self.inp),
            HardSwish(),
            nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(self.oup),
            HardSwish(),
        )
        self.se = SELayer(self.oup)
    def forward(self, x):
        x = self.dw_sp(x)
        if self.use_se:
            x = self.se(x)
        return x
 class PP_LCNet(nn.Module):
    def __init__(self, scale=1.0, class_num=256, class_expand=1280, dropout_prob=0.2):
        super(PP_LCNet, self).__init__()
        self.scale = scale
        self.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale),
                               kernel_size=3, stride=2, padding=1, bias=False)
        # k, in_c, out_c, s, use_se   inp, oup, dw_size, stride, use_se=False
        self.blocks2 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"])
        ])
        self.blocks3 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"])
        ])
        self.blocks4 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"])
        ])
        # k, in_c, out_c, s, use_se  inp, oup, dw_size, stride, use_se=False
        self.blocks5 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"])
        ])
        self.blocks6 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"])
        ])
        self.GAP = nn.AdaptiveAvgPool2d(1)
        self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
                                   out_channels=class_expand,
                                   kernel_size=1, stride=1, padding=0, bias=False)
        self.hardswish = HardSwish()
        self.dropout = nn.Dropout(p=dropout_prob)
        self.fc = nn.Linear(class_expand, class_num)
    def forward(self, x):
        x = self.conv1(x)
        # print(x.shape)
        x = self.blocks2(x)
        # print(x.shape)
        x = self.blocks3(x)
        # print(x.shape)
        x = self.blocks4(x)
        # print(x.shape)
        x = self.blocks5(x)
        # print(x.shape)
        x = self.blocks6(x)
        # print(x.shape)
        x = self.GAP(x)
        x = self.last_conv(x)
        x = self.hardswish(x)
        x = self.dropout(x)
        x = torch.flatten(x, start_dim=1, end_dim=-1)
        x = self.fc(x)
        return x
 def PPLCNET_x0_25(**kwargs):
    model = PP_LCNet(scale=0.25, **kwargs)
    return model
 def PPLCNET_x0_35(**kwargs):
    model = PP_LCNet(scale=0.35, **kwargs)
    return model
 def PPLCNET_x0_5(**kwargs):
    model = PP_LCNet(scale=0.5, **kwargs)
    return model
 def PPLCNET_x0_75(**kwargs):
    model = PP_LCNet(scale=0.75, **kwargs)
    return model
 def PPLCNET_x1_0(**kwargs):
    model = PP_LCNet(scale=1.0, **kwargs)
    return model
 def PPLCNET_x1_5(**kwargs):
    model = PP_LCNet(scale=1.5, **kwargs)
    return model
 def PPLCNET_x2_0(**kwargs):
    model = PP_LCNet(scale=2.0, **kwargs)
    return model
 def PPLCNET_x2_5(**kwargs):
    model = PP_LCNet(scale=2.5, **kwargs)
    return model
 if __name__ == '__main__':
    # input = torch.randn(1, 3, 640, 640)
    # model = PPLCNET_x2_5()
    # flops, params = thop.profile(model, inputs=(input,))
    # print('flops:', flops / 1000000000)
    # print('params:', params / 1000000)
    model = PPLCNET_x1_0()
    # model_1 = PW_Conv(3, 16)
    input = torch.randn(2, 3, 256, 256)
    print(input.shape)
    output = model(input)
    print(output.shape)  # [1, num_class]
--- a/model/loss.py
+++ b/model/loss.py
@ -0,0 +1,18 @@
 import torch
 import torch.nn as nn
 class FocalLoss(nn.Module):
    def __init__(self, gamma=2):
        super().__init__()
        self.gamma = gamma
        self.ce = torch.nn.CrossEntropyLoss()
    def forward(self, input, target):
        #print(f'theta {input.shape, input[0]}, target {target.shape, target}')
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()
--- a/model/metric.py
+++ b/model/metric.py
@ -0,0 +1,94 @@
 # Definition of ArcFace loss and CosFace loss
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class ArcFace(nn.Module):
    def __init__(self, embedding_size, class_num, s=30.0, m=0.50):
        """ArcFace formula: 
            cos(m + theta) = cos(m)cos(theta) - sin(m)sin(theta)
        Note that:
            0 <= m + theta <= Pi
        So if (m + theta) >= Pi, then theta >= Pi - m. In [0, Pi]
        we have:
            cos(theta) < cos(Pi - m)
        So we can use cos(Pi - m) as threshold to check whether 
        (m + theta) go out of [0, Pi]
        Args:
            embedding_size: usually 128, 256, 512 ...
            class_num: num of people when training
            s: scale, see normface https://arxiv.org/abs/1704.06369
            m: margin, see SphereFace, CosFace, and ArcFace paper
        """
        super().__init__()
        self.in_features = embedding_size
        self.out_features = class_num
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(class_num, embedding_size))
        nn.init.xavier_uniform_(self.weight)
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
    def forward(self, input, label):
        #print(f"embding {self.in_features}, class_num {self.out_features}, input {len(input)}, label {len(label)}")
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # print('F.normalize(input)',input.shape)
        # print('F.normalize(self.weight)',F.normalize(self.weight).shape)
        sine = ((1.0 - cosine.pow(2)).clamp(0, 1)).sqrt()
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)  # drop to CosFace
        #print(f'consine {cosine.shape, cosine}, sine {sine.shape, sine}, phi {phi.shape, phi}')
        # update y_i by phi in cosine
        output = cosine * 1.0  # make backward works
        batch_size = len(output)
        output[range(batch_size), label] = phi[range(batch_size), label]
        # print(f'output {(output * self.s).shape}')
        # print(f'phi[range(batch_size), label] {phi[range(batch_size), label]}')
        return output * self.s
 class CosFace(nn.Module):
    def __init__(self, in_features, out_features, s=30.0, m=0.40):
        """
        Args:
            embedding_size: usually 128, 256, 512 ...
            class_num: num of people when training
            s: scale, see normface https://arxiv.org/abs/1704.06369
            m: margin, see SphereFace, CosFace, and ArcFace paper
        """
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
    def forward(self, input, label):
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        phi = cosine - self.m
        output = cosine * 1.0  # make backward works
        batch_size = len(output)
        output[range(batch_size), label] = phi[range(batch_size), label]
        return output * self.s
 class Distillation(nn.Module):
    def __init__(self, in_features, out_features, T=1.0):
        super(Distillation, self).__init__()
        self.T = T
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)
    def forward(self, input, labels):
        pass
--- a/model/mlp.py
+++ b/model/mlp.py
@ -0,0 +1,274 @@
 import pdb
 import torch
 import torch.nn as nn
 import torch.nn.init as init
 from model.resnet_pre import resnet18, conv1x1, BasicBlock, load_state_dict_from_url, model_urls
 class MLP(nn.Module):
    def __init__(self, input_dim=256, output_dim=1):
        super(MLP, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.fc1 = nn.Linear(self.input_dim, 128)  # 32
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, self.output_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.5)
        self.bn1 = nn.BatchNorm1d(128)
        self.bn2 = nn.BatchNorm1d(64)
        self.bn3 = nn.BatchNorm1d(32)
        self.bn4 = nn.BatchNorm1d(16)
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(self.bn1(x))
        x = self.fc2(x)
        x = self.relu(self.bn2(x))
        x = self.fc3(x)
        x = self.relu(self.bn3(x))
        x = self.fc4(x)
        x = self.relu(self.bn4(x))
        x = self.sigmoid(self.fc5(x))
        return x
 class Net2(nn.Module):  # 该网络部署有风险，dnn推理有障碍
    def __init__(self, input_dim=960, output_dim=1):
        super(Net2, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1)
        # self.conv3 = nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1)
        # self.conv4 = nn.Conv1d(64, 64, kernel_size=5, stride=2, padding=1)
        self.maxPool1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv5 = nn.Conv1d(32, 64, kernel_size=5, stride=2, padding=1)
        self.maxPool2 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.avgPool = nn.AdaptiveAvgPool1d(1)
        self.MaxPool = nn.AdaptiveMaxPool1d(1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.5)
        self.flatten = nn.Flatten()
        # self.conv6 = nn.Conv1d(128, 128, kernel_size=5, stride=2, padding=1)
        self.fc1 = nn.Linear(960, 128)
        self.fc21 = nn.Linear(960, 32)
        self.fc22 = nn.Linear(32, 128)
        self.fc3 = nn.Linear(128, 1)
        self.bn1 = nn.BatchNorm1d(16)
        self.bn2 = nn.BatchNorm1d(32)
        self.bn3 = nn.BatchNorm1d(64)
        self.bn4 = nn.BatchNorm1d(128)
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def conv1x1(in_planes, out_planes, stride=1):
        """1x1 convolution"""
        return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
    def forward(self, x):
        x = self.conv1(x)  # 16
        x = self.relu(x)
        x = self.conv2(x)  # 32
        x = self.relu(x)
        # x = self.conv3(x)
        # x = self.relu(x)
        # x = self.conv4(x)  # 64
        # x = self.relu(x)
        # x = self.maxPool1(x)
        x = self.conv5(x)
        x = self.relu(x)
        # x = self.conv6(x)
        # x = self.relu(x)
        # x = self.maxPool2(x)
        # x = self.MaxPool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.flatten(x)
        # pdb.set_trace()
        x1 = self.fc1(x)
        x2 = self.fc22(self.fc21(x))
        x = self.fc3(x1 + x2)
        x = self.sigmoid(x)
        return x
 class Net3(nn.Module):  # 目前较合适的网络结构，相较于Net2，Net3的输出结果更加准确
    def __init__(self, pretrained=True, progress=True, num_classes=1, scale=0.75):
        super(Net3, self).__init__()
        self.resnet18 = resnet18(pretrained=pretrained, progress=progress)
        # Remove the last three layers (layer3, layer4, avgpool, fc)
        # self.resnet18.layer3 = nn.Identity()
        # self.resnet18.layer4 = nn.Identity()
        self.resnet18.avgpool = nn.Identity()
        self.resnet18.fc = nn.Identity()
        self.flatten = nn.Flatten()
        # Calculate the output size after layer2
        # Assuming input size is 224x224, layer2 will have output size of 56x56
        # So, the flattened size will be 128 * scale * 56 * 56
        self.flattened_size = int(128 * (56 * 56) * scale * scale)
        # Add new layers for classification
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(384, num_classes),  #  layer1, layer2 in_features=96   # layer1 in_features=48  #layer3 in_features=192
            # nn.ReLU(),
            nn.Dropout(0.6),
            # nn.Linear(256, num_classes),
            nn.Sigmoid()
        )
    def forward(self, x):
        x = self.resnet18.layer1(x)
        x = self.resnet18.layer2(x)
        x = self.resnet18.layer3(x)
        x = self.resnet18.layer4(x)
        # Debugging: Print the shape of the tensor before flattening
        # print("Shape before flattening:", x.shape)
        # Ensure the tensor is flattened correctly
        # x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
 class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None, scale=0.75):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, int(64 * scale), layers[0])
        self.layer2 = self._make_layer(block, int(128 * scale), layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, int(256 * scale), layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, int(512 * scale), layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(int(512 * block.expansion * scale), num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        self.sigmoid = nn.Sigmoid()
    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))
        return nn.Sequential(*layers)
    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x
    def forward(self, x):
        return self._forward_impl(x)
 def Net4(arch, pretrained, progress, **kwargs):
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
        src_state_dict = state_dict
        target_state_dict = model.state_dict()
        skip_keys = []
        # skip mismatch size tensors in case of pretraining
        for k in src_state_dict.keys():
            if k not in target_state_dict:
                continue
            if src_state_dict[k].size() != target_state_dict[k].size():
                skip_keys.append(k)
        for k in skip_keys:
            del src_state_dict[k]
        missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
    return model
 if __name__ == '__main__':
    '''
    net2 = Net2()
    input_tensor = torch.randn(10, 1, 64)
    # 前向传播
    output_tensor = net2(input_tensor)
    # pdb.set_trace()
    print("输入张量形状:", input_tensor.shape)
    print("输出张量形状:", output_tensor.shape)
    '''
    # model = Net3(pretrained=True, num_classes=1)  # 预训练从resnet中间结果获取数据训练模型
    model = Net4('resnet18', True, True)
    input_tensor = torch.randn(1, 3, 224, 244)  # Adjust batch size to 10
    output = model(input_tensor)
    print(output.shape)  # Should be [10, 2]
--- a/model/mobilenet_v1.py
+++ b/model/mobilenet_v1.py
@ -0,0 +1,148 @@
 # Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
 #   You may obtain a copy of the License at
 #
 #       http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from typing import Callable, Any, Optional
 import torch
 from torch import Tensor
 from torch import nn
 from torchvision.ops.misc import Conv2dNormActivation
 from config import config as conf
 __all__ = [
    "MobileNetV1",
    "DepthWiseSeparableConv2d",
    "mobilenet_v1",
 ]
 class MobileNetV1(nn.Module):
    def __init__(
            self,
            num_classes: int = conf.embedding_size,
    ) -> None:
        super(MobileNetV1, self).__init__()
        self.features = nn.Sequential(
            Conv2dNormActivation(3,
                                 32,
                                 kernel_size=3,
                                 stride=2,
                                 padding=1,
                                 norm_layer=nn.BatchNorm2d,
                                 activation_layer=nn.ReLU,
                                 inplace=True,
                                 bias=False,
                                 ),
            DepthWiseSeparableConv2d(32, 64, 1),
            DepthWiseSeparableConv2d(64, 128, 2),
            DepthWiseSeparableConv2d(128, 128, 1),
            DepthWiseSeparableConv2d(128, 256, 2),
            DepthWiseSeparableConv2d(256, 256, 1),
            DepthWiseSeparableConv2d(256, 512, 2),
            DepthWiseSeparableConv2d(512, 512, 1),
            DepthWiseSeparableConv2d(512, 512, 1),
            DepthWiseSeparableConv2d(512, 512, 1),
            DepthWiseSeparableConv2d(512, 512, 1),
            DepthWiseSeparableConv2d(512, 512, 1),
            DepthWiseSeparableConv2d(512, 1024, 2),
            DepthWiseSeparableConv2d(1024, 1024, 1),
        )
        self.avgpool = nn.AvgPool2d((7, 7))
        self.classifier = nn.Linear(1024, num_classes)
        # Initialize neural network weights
        self._initialize_weights()
    def forward(self, x: Tensor) -> Tensor:
        out = self._forward_impl(x)
        return out
    # Support torch.script function
    def _forward_impl(self, x: Tensor) -> Tensor:
        out = self.features(x)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out
    def _initialize_weights(self) -> None:
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
                if module.bias is not None:
                    nn.init.zeros_(module.bias)
            elif isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(module.weight)
                nn.init.zeros_(module.bias)
            elif isinstance(module, nn.Linear):
                nn.init.normal_(module.weight, 0, 0.01)
                nn.init.zeros_(module.bias)
 class DepthWiseSeparableConv2d(nn.Module):
    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            stride: int,
            norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(DepthWiseSeparableConv2d, self).__init__()
        self.stride = stride
        if stride not in [1, 2]:
            raise ValueError(f"stride should be 1 or 2 instead of {stride}")
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self.conv = nn.Sequential(
            Conv2dNormActivation(in_channels,
                                 in_channels,
                                 kernel_size=3,
                                 stride=stride,
                                 padding=1,
                                 groups=in_channels,
                                 norm_layer=norm_layer,
                                 activation_layer=nn.ReLU,
                                 inplace=True,
                                 bias=False,
                                 ),
            Conv2dNormActivation(in_channels,
                                 out_channels,
                                 kernel_size=1,
                                 stride=1,
                                 padding=0,
                                 norm_layer=norm_layer,
                                 activation_layer=nn.ReLU,
                                 inplace=True,
                                 bias=False,
                                 ),
        )
    def forward(self, x: Tensor) -> Tensor:
        out = self.conv(x)
        return out
 def mobilenet_v1(**kwargs: Any) -> MobileNetV1:
    model = MobileNetV1(**kwargs)
    return model
--- a/model/mobilenet_v2.py
+++ b/model/mobilenet_v2.py
@ -0,0 +1,200 @@
 from torch import nn
 from .utils import load_state_dict_from_url
 from config import config as conf
 __all__ = ['MobileNetV2', 'mobilenet_v2']
 model_urls = {
    'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
 }
 def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v
 class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=None):
        padding = (kernel_size - 1) // 2
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
            norm_layer(out_planes),
            nn.ReLU6(inplace=True)
        )
 class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio, norm_layer=None):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup
        layers = []
        if expand_ratio != 1:
            # pw
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
            # pw-linear
            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
            norm_layer(oup),
        ])
        self.conv = nn.Sequential(*layers)
    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)
 class MobileNetV2(nn.Module):
    def __init__(self,
                 num_classes=conf.embedding_size,
                 width_mult=1.0,
                 inverted_residual_setting=None,
                 round_nearest=8,
                 block=None,
                 norm_layer=None):
        """
        MobileNet V2 main class
        Args:
            num_classes (int): Number of classes
            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
            inverted_residual_setting: Network structure
            round_nearest (int): Round the number of channels in each layer to be a multiple of this number
            Set to 1 to turn off rounding
            block: Module specifying inverted residual building block for mobilenet
            norm_layer: Module specifying the normalization layer to use
        """
        super(MobileNetV2, self).__init__()
        if block is None:
            block = InvertedResidual
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        input_channel = 32
        last_channel = 1280
        if inverted_residual_setting is None:
            inverted_residual_setting = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]
        # only check the first element, assuming user knows t,c,n,s are required
        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
            raise ValueError("inverted_residual_setting should be non-empty "
                             "or a 4-element list, got {}".format(inverted_residual_setting))
        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
        # building inverted residual blocks
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)
        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, num_classes),
        )
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
    def _forward_impl(self, x):
        # This exists since TorchScript doesn't support inheritance, so the superclass method
        # (this one) needs to have a name other than `forward` that can be accessed in a subclass
        x = self.features(x)
        # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
        x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
        x = self.classifier(x)
        return x
    def forward(self, x):
        return self._forward_impl(x)
 def mobilenet_v2(pretrained=True, progress=True, **kwargs):
    """
    Constructs a MobileNetV2 architecture from
    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = MobileNetV2(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
                                              progress=progress)
        src_state_dict = state_dict
        target_state_dict = model.state_dict()
        skip_keys = []
        # skip mismatch size tensors in case of pretraining
        for k in src_state_dict.keys():
            if k not in target_state_dict:
                continue
            if src_state_dict[k].size() != target_state_dict[k].size():
                skip_keys.append(k)
        for k in skip_keys:
            del src_state_dict[k]
        missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
        #.load_state_dict(state_dict)
    return model
--- a/model/mobilenet_v3.py
+++ b/model/mobilenet_v3.py
@ -0,0 +1,200 @@
 '''MobileNetV3 in PyTorch.
 See the paper "Inverted Residuals and Linear Bottlenecks:
 Mobile Networks for Classification, Detection and Segmentation" for more details.
 '''
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn import init
 from config import config as conf
 class hswish(nn.Module):
    def forward(self, x):
        out = x * F.relu6(x + 3, inplace=True) / 6
        return out
 class hsigmoid(nn.Module):
    def forward(self, x):
        out = F.relu6(x + 3, inplace=True) / 6
        return out
 class SeModule(nn.Module):
    def __init__(self, in_size, reduction=4):
        super(SeModule, self).__init__()
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(in_size // reduction),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(in_size),
            hsigmoid()
        )
    def forward(self, x):
        return x * self.se(x)
 class Block(nn.Module):
    '''expand + depthwise + pointwise'''
    def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
        super(Block, self).__init__()
        self.stride = stride
        self.se = semodule
        self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(expand_size)
        self.nolinear1 = nolinear
        self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
        self.bn2 = nn.BatchNorm2d(expand_size)
        self.nolinear2 = nolinear
        self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_size)
        self.shortcut = nn.Sequential()
        if stride == 1 and in_size != out_size:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_size),
            )
    def forward(self, x):
        out = self.nolinear1(self.bn1(self.conv1(x)))
        out = self.nolinear2(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        if self.se != None:
            out = self.se(out)
        out = out + self.shortcut(x) if self.stride==1 else out
        return out
 class MobileNetV3_Large(nn.Module):
    def __init__(self, num_classes=conf.embedding_size):
        super(MobileNetV3_Large, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.hs1 = hswish()
        self.bneck = nn.Sequential(
            Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
            Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
            Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
            Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
            Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
            Block(3, 40, 240, 80, hswish(), None, 2),
            Block(3, 80, 200, 80, hswish(), None, 1),
            Block(3, 80, 184, 80, hswish(), None, 1),
            Block(3, 80, 184, 80, hswish(), None, 1),
            Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
            Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
            Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
            Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
            Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
        )
        self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(960)
        self.hs2 = hswish()
        self.linear3 = nn.Linear(960, 1280)
        self.bn3 = nn.BatchNorm1d(1280)
        self.hs3 = hswish()
        self.linear4 = nn.Linear(1280, num_classes)
        self.init_params()
    def init_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        out = self.hs1(self.bn1(self.conv1(x)))
        out = self.bneck(out)
        out = self.hs2(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, conf.img_size // 32)
        out = out.view(out.size(0), -1)
        out = self.hs3(self.bn3(self.linear3(out)))
        out = self.linear4(out)
        return out
 class MobileNetV3_Small(nn.Module):
    def __init__(self, num_classes=conf.embedding_size):
        super(MobileNetV3_Small, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.hs1 = hswish()
        self.bneck = nn.Sequential(
            Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
            Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
            Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
            Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
            Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
            Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
            Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
            Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
            Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
        )
        self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(576)
        self.hs2 = hswish()
        self.linear3 = nn.Linear(576, 1280)
        self.bn3 = nn.BatchNorm1d(1280)
        self.hs3 = hswish()
        self.linear4 = nn.Linear(1280, num_classes)
        self.init_params()
    def init_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
    def forward(self, x):
        out = self.hs1(self.bn1(self.conv1(x)))
        out = self.bneck(out)
        out = self.hs2(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, conf.img_size // 32)
        out = out.view(out.size(0), -1)
        out = self.hs3(self.bn3(self.linear3(out)))
        out = self.linear4(out)
        return out
 def test():
    net = MobileNetV3_Small()
    x = torch.randn(2,3,224,224)
    y = net(x)
    print(y.size())
 # test()
--- a/model/mobilevit.py
+++ b/model/mobilevit.py
@ -0,0 +1,265 @@
 import torch
 import torch.nn as nn
 from einops import rearrange
 from config import config as conf
 def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.SiLU()
    )
 def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.SiLU()
    )
 class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)
 class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)
 class Attention(nn.Module):
    def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
        super().__init__()
        inner_dim = dim_head * heads
        project_out = not (heads == 1 and dim_head == dim)
        self.heads = heads
        self.scale = dim_head ** -0.5
        self.attend = nn.Softmax(dim=-1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()
    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=self.heads), qkv)
        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
        attn = self.attend(dots)
        out = torch.matmul(attn, v)
        out = rearrange(out, 'b p h n d -> b p n (h d)')
        return self.to_out(out)
 class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x
 class MV2Block(nn.Module):
    def __init__(self, inp, oup, stride=1, expansion=4):
        super().__init__()
        self.stride = stride
        assert stride in [1, 2]
        hidden_dim = int(inp * expansion)
        self.use_res_connect = self.stride == 1 and inp == oup
        if expansion == 1:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)
 class MobileViTBlock(nn.Module):
    def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
        super().__init__()
        self.ph, self.pw = patch_size
        self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
        self.conv2 = conv_1x1_bn(channel, dim)
        self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)
        self.conv3 = conv_1x1_bn(dim, channel)
        self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
    def forward(self, x):
        y = x.clone()
        # Local representations
        x = self.conv1(x)
        x = self.conv2(x)
        # Global representations
        _, _, h, w = x.shape
        x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
        x = self.transformer(x)
        x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h // self.ph, w=w // self.pw, ph=self.ph,
                      pw=self.pw)
        # Fusion
        x = self.conv3(x)
        x = torch.cat((x, y), 1)
        x = self.conv4(x)
        return x
 class MobileViT(nn.Module):
    def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
        super().__init__()
        ih, iw = image_size
        ph, pw = patch_size
        assert ih % ph == 0 and iw % pw == 0
        L = [2, 4, 3]
        self.conv1 = conv_nxn_bn(3, channels[0], stride=2)
        self.mv2 = nn.ModuleList([])
        self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
        self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion))
        self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
        self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))  # Repeat
        self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion))
        self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
        self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
        self.mvit = nn.ModuleList([])
        self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0] * 2)))
        self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1] * 4)))
        self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2] * 4)))
        self.conv2 = conv_1x1_bn(channels[-2], channels[-1])
        self.pool = nn.AvgPool2d(ih // 32, 1)
        self.fc = nn.Linear(channels[-1], num_classes, bias=False)
    def forward(self, x):
        #print('x',x.shape)
        x = self.conv1(x)
        x = self.mv2[0](x)
        x = self.mv2[1](x)
        x = self.mv2[2](x)
        x = self.mv2[3](x)  # Repeat
        x = self.mv2[4](x)
        x = self.mvit[0](x)
        x = self.mv2[5](x)
        x = self.mvit[1](x)
        x = self.mv2[6](x)
        x = self.mvit[2](x)
        x = self.conv2(x)
        #print('pool_before',x.shape)
        x = self.pool(x).view(-1, x.shape[1])
        #print('self_pool',self.pool)
        #print('pool_after',x.shape)
        x = self.fc(x)
        return x
 def mobilevit_xxs():
    dims = [64, 80, 96]
    channels = [16, 16, 24, 24, 48, 48, 64, 64, 80, 80, 320]
    return MobileViT((256, 256), dims, channels, num_classes=1000, expansion=2)
 def mobilevit_xs():
    dims = [96, 120, 144]
    channels = [16, 32, 48, 48, 64, 64, 80, 80, 96, 96, 384]
    return MobileViT((256, 256), dims, channels, num_classes=1000)
 def mobilevit_s():
    dims = [144, 192, 240]
    channels = [16, 32, 64, 64, 96, 96, 128, 128, 160, 160, 640]
    return MobileViT((conf.img_size, conf.img_size), dims, channels, num_classes=conf.embedding_size)
 def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
 if __name__ == '__main__':
    img = torch.randn(5, 3, 256, 256)
    vit = mobilevit_xxs()
    out = vit(img)
    print(out.shape)
    print(count_parameters(vit))
    vit = mobilevit_xs()
    out = vit(img)
    print(out.shape)
    print(count_parameters(vit))
    vit = mobilevit_s()
    out = vit(img)
    print(out.shape)
    print(count_parameters(vit))
--- a/model/quant_test_resnet.py
+++ b/model/quant_test_resnet.py
@ -0,0 +1,412 @@
 import torch
 from torch import Tensor
 import torch.nn as nn
 from .utils import load_state_dict_from_url
 from typing import Type, Any, Callable, Union, List, Optional
 __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
           'wide_resnet50_2', 'wide_resnet101_2']
 model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
 }
 def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)
 def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 class BasicBlock(nn.Module):
    expansion: int = 1
    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x: Tensor) -> Tensor:
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out
 class QuantizableBasicBlock(BasicBlock):
    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        self.add_relu = torch.nn.quantized.FloatFunctional()
    def forward(self, x: Tensor) -> Tensor:
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.add_relu.add_relu(out, identity)
        return out
 class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
    expansion: int = 4
    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x: Tensor) -> Tensor:
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out
 class ResNet(nn.Module):
    def __init__(
        self,
        block: Type[Union[BasicBlock, Bottleneck]],
        layers: List[int],
        num_classes: int = 1000,
        zero_init_residual: bool = False,
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
    def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))
        return nn.Sequential(*layers)
    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)
 def _resnet(
    arch: str,
    block: Type[Union[BasicBlock, Bottleneck]],
    layers: List[int],
    pretrained: bool,
    progress: bool,
    **kwargs: Any
 ) -> ResNet:
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
 def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNet-18 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    # return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
    return _resnet('resnet18', QuantizableBasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
 def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNet-34 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)
 def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)
 def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNet-101 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
                   **kwargs)
 def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNet-152 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
                   **kwargs)
 def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNeXt-50 32x4d model from
    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['groups'] = 32
    kwargs['width_per_group'] = 4
    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
                   pretrained, progress, **kwargs)
 def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""ResNeXt-101 32x8d model from
    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['groups'] = 32
    kwargs['width_per_group'] = 8
    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
                   pretrained, progress, **kwargs)
 def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""Wide ResNet-50-2 model from
    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
    The model is the same as ResNet except for the bottleneck number of channels
    which is twice larger in every block. The number of channels in outer 1x1
    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
                   pretrained, progress, **kwargs)
 def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
    r"""Wide ResNet-101-2 model from
    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
    The model is the same as ResNet except for the bottleneck number of channels
    which is twice larger in every block. The number of channels in outer 1x1
    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
                   pretrained, progress, **kwargs)
--- a/model/resbam.py
+++ b/model/resbam.py
@ -0,0 +1,142 @@
 from model.CBAM import CBAM
 import torch
 import torch.nn as nn
 from model.Tool import GeM as gem
 class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, inchannel, outchannel, stride=1, dowsample=None):
        # super(Bottleneck, self).__init__()
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=inchannel, out_channels=outchannel, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outchannel)
        self.conv2 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel, kernel_size=3, bias=False,
                               stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(outchannel)
        self.conv3 = nn.Conv2d(in_channels=outchannel, out_channels=outchannel * self.expansion, stride=1, bias=False,
                               kernel_size=1)
        self.bn3 = nn.BatchNorm2d(outchannel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = dowsample
    def forward(self, x):
        self.identity = x
        # print('>>>>>>>>',type(x))
        if self.downsample is not None:
            # print('>>>>downsample>>>>', type(self.downsample))
            self.identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        # print('>>>>out>>>identity',out.size(),self.identity.size())
        out = out + self.identity
        out = self.relu(out)
        return out
 class resnet(nn.Module):
    def __init__(self, block=Bottleneck, block_num=[3, 4, 6, 3], num_class=1000):
        super().__init__()
        self.in_channel = 64
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=self.in_channel,
                               stride=2,
                               kernel_size=7,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.cbam = CBAM(self.in_channel)
        self.cbam1 = CBAM(2048)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, block_num[0], stride=1)
        self.layer2 = self._make_layer(block, 128, block_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, block_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, block_num[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.gem = gem()
        self.fc = nn.Linear(512 * block.expansion, num_class)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal(m.weight, mode='fan_out',
                                       nonlinearity='relu')
            if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 1.0)
    def _make_layer(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion))
        layer = []
        layer.append(block(self.in_channel, channel, stride, downsample))
        self.in_channel = channel * block.expansion
        for _ in range(1, block_num):
            layer.append(block(self.in_channel, channel))
        return nn.Sequential(*layer)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.cbam(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.cbam1(x)
        # x = self.avgpool(x)
        x = self.gem(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
 class TripletNet(nn.Module):
    def __init__(self, num_class, flag=True):
        super(TripletNet, self).__init__()
        self.initnet = rescbam(num_class)
        self.flag = flag
    def forward(self, x1, x2=None, x3=None):
        if self.flag:
            output1 = self.initnet(x1)
            output2 = self.initnet(x2)
            output3 = self.initnet(x3)
            return output1, output2, output3
        else:
            output = self.initnet(x1)
            return output
 def rescbam(num_class):
    return resnet(block=Bottleneck, block_num=[3, 4, 6, 3], num_class=num_class)
 if __name__ == '__main__':
    input1 = torch.randn(4, 3, 640, 640)
    input2 = torch.randn(4, 3, 640, 640)
    input3 = torch.randn(4, 3, 640, 640)
    # rescbam测试
    # Resnet50 = rescbam(512)
    # output = Resnet50.forward(input1)
    # print(Resnet50)
    # trnet测试
    trnet = TripletNet(512)
    output = trnet(input1, input2, input3)
    print(output)
--- a/model/resnet.py
+++ b/model/resnet.py
@ -0,0 +1,189 @@
 """resnet in pytorch
 [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
    Deep Residual Learning for Image Recognition
    https://arxiv.org/abs/1512.03385v1
 """
 import torch
 import torch.nn as nn
 from config import config as conf
 from CBAM import CBAM
 class BasicBlock(nn.Module):
    """Basic Block for resnet 18 and resnet 34
    """
    #BasicBlock and BottleNeck block
    #have different output size
    #we use class attribute expansion
    #to distinct
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        #residual function
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion)
        )
        #shortcut
        self.shortcut = nn.Sequential()
        #the shortcut output dimension is not the same with residual function
        #use 1*1 convolution to match the dimension
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
 class BottleNeck(nn.Module):
    """Residual block for resnet over 50 layers
    """
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * BottleNeck.expansion)
            )
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
 class ResNet(nn.Module):
    def __init__(self, block, num_block, cbam = False, num_classes=conf.embedding_size):
        super().__init__()
        self.in_channels = 64
        # self.conv1 = nn.Sequential(
        #     nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False),
        #     nn.BatchNorm2d(64),
        #     nn.ReLU(inplace=True))
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64,stride=2,kernel_size=7,padding=3,bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        self.cbam = CBAM(self.in_channels)
        #we use a different inputsize than the original paper
        #so conv2_x's stride is 1
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.cbam1 = CBAM(self.in_channels)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal(m.weight,mode = 'fan_out',
                                       nonlinearity='relu')
            if isinstance(m, (nn.BatchNorm2d)):
                nn.init.constant_(m.weight, 1.0)
                nn.init.constant_(m.bias, 1.0)
    def _make_layer(self, block, out_channels, num_blocks, stride):
        """make resnet layers(by layer i didnt mean this 'layer' was the
        same as a neuron netowork layer, ex. conv layer), one layer may
        contain more than one residual block
        Args:
            block: block type, basic block or bottle neck block
            out_channels: output depth channel number of this layer
            num_blocks: how many blocks per layer
            stride: the stride of the first block of this layer
        Return:
            return a resnet layer
        """
        # we have num_block blocks per layer, the first block
        # could be 1 or 2, other blocks would always be 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)
    def forward(self, x):
        output = self.conv1(x)
        if cbam:
            output = self.cbam(x)
        output = self.conv2_x(output)
        output = self.conv3_x(output)
        output = self.conv4_x(output)
        output = self.conv5_x(output)
        if cbam:
            output = self.cbam1(x)
        print('pollBefore',output.shape)
        output = self.avg_pool(output)
        print('poolAfter',output.shape)
        output = output.view(output.size(0), -1)
        print('fcBefore',output.shape)
        output = self.fc(output)
        return output
 def resnet18(cbam = False):
    """ return a ResNet 18 object
    """
    return ResNet(BasicBlock, [2, 2, 2, 2], cbam)
 def resnet34():
    """ return a ResNet 34 object
    """
    return ResNet(BasicBlock, [3, 4, 6, 3])
 def resnet50():
    """ return a ResNet 50 object
    """
    return ResNet(BottleNeck, [3, 4, 6, 3])
 def resnet101():
    """ return a ResNet 101 object
    """
    return ResNet(BottleNeck, [3, 4, 23, 3])
 def resnet152():
    """ return a ResNet 152 object
    """
    return ResNet(BottleNeck, [3, 8, 36, 3])
--- a/model/resnet_attention.py
+++ b/model/resnet_attention.py
@ -0,0 +1,271 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class ChannelAttention(nn.Module):
    """通道注意力模块，通过全局平均池化和最大池化提取特征，经过MLP生成通道权重"""
    def __init__(self, in_channels, reduction_ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        # 共享的MLP层
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // reduction_ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_channels // reduction_ratio, in_channels, 1, bias=False)
        )
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        out = avg_out + max_out
        return torch.sigmoid(out)
 class SpatialAttention(nn.Module):
    """空间注意力模块，通过通道维度的平均和最大值操作，生成空间权重"""
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size // 2, bias=False)
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        out = torch.cat([avg_out, max_out], dim=1)
        out = self.conv(out)
        return torch.sigmoid(out)
 class CBAM(nn.Module):
    """CBAM注意力模块，串联通道注意力和空间注意力"""
    def __init__(self, in_channels, reduction_ratio=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.channel_att = ChannelAttention(in_channels, reduction_ratio)
        self.spatial_att = SpatialAttention(kernel_size)
    def forward(self, x):
        x = x * self.channel_att(x)
        x = x * self.spatial_att(x)
        return x
 class BasicBlock(nn.Module):
    """ResNet基础残差块，适用于ResNet18和ResNet34"""
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None, use_cbam=False):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.stride = stride
        # 是否使用CBAM注意力机制
        self.use_cbam = use_cbam
        if use_cbam:
            self.cbam = CBAM(out_channels)
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        # # 如果使用注意力机制，应用CBAM
        if self.use_cbam:
            out = self.cbam(out)
        # 如果有下采样，调整shortcut连接
        if self.downsample is not None:
            identity = self.downsample(x)
        # 残差连接
        out += identity
        out = self.relu(out)
        return out
 class Bottleneck(nn.Module):
    """ResNet瓶颈残差块，适用于ResNet50及更深的网络"""
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1, downsample=None, use_cbam=False):
        super(Bottleneck, self).__init__()
        # 1x1卷积降维
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        # 3x3卷积
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        # 1x1卷积升维
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        # 是否使用CBAM注意力机制
        self.use_cbam = use_cbam
        if use_cbam:
            self.cbam = CBAM(out_channels * self.expansion)
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        # # 如果使用注意力机制，应用CBAM
        if self.use_cbam:
            out = self.cbam(out)
        # 如果有下采样，调整shortcut连接
        if self.downsample is not None:
            identity = self.downsample(x)
        # 残差连接
        out += identity
        out = self.relu(out)
        return out
 class ResNet(nn.Module):
    """集成了CBAM注意力机制的ResNet模型"""
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, use_cbam=True):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.use_cbam = use_cbam
        # 初始卷积层
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.cbam1 = CBAM(64)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # 残差块层
        self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.cbam2 = CBAM(512)
        # 全局平均池化和分类器
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        # 初始化权重
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        # 零初始化最后一个BN层的权重，使残差分支初始为0
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        # 如果通道数不匹配或需要调整步长，创建下采样层
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )
        layers = []
        # 第一个块可能需要下采样
        layers.append(block(self.in_channels, out_channels, stride, downsample, use_cbam=self.use_cbam))
        self.in_channels = out_channels * block.expansion
        # 添加剩余的块
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels, use_cbam=self.use_cbam))
        return nn.Sequential(*layers)
    def forward(self, x):
        # 特征提取
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # if self.use_cbam:
        #     x = self.cbam1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # if self.use_cbam:
        #     x = self.cbam2(x)
        # 分类
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
 # 工厂函数，创建不同深度的ResNet模型
 def resnet18_cbam(pretrained=False, **kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
 def resnet34_cbam(pretrained=False, **kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
 def resnet50_cbam(pretrained=False, **kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
 def resnet101_cbam(pretrained=False, **kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
 def resnet152_cbam(pretrained=False, **kwargs):
    return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
 # 测试模型
 if __name__ == "__main__":
    # 创建一个带有CBAM注意力机制的ResNet50模型
    model = resnet50_cbam(num_classes=10)
    # 测试输入
    x = torch.randn(1, 3, 224, 224)
    y = model(x)
    print(f"输入形状: {x.shape}")
    print(f"输出形状: {y.shape}")
--- a/model/resnet_pre.py
+++ b/model/resnet_pre.py
@ -0,0 +1,480 @@
 import torch
 import torch.nn as nn
 from config import config as conf
 try:
    from torch.hub import load_state_dict_from_url
 except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url
 # from .utils import load_state_dict_from_url
 __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
           'wide_resnet50_2', 'wide_resnet101_2']
 model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
 }
 def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)
 def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
 class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        self.cam = cam
        self.bam = bam
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
        if self.cam:
            if planes == 64:
                self.globalAvgPool = nn.AvgPool2d(56, stride=1)
            elif planes == 128:
                self.globalAvgPool = nn.AvgPool2d(28, stride=1)
            elif planes == 256:
                self.globalAvgPool = nn.AvgPool2d(14, stride=1)
            elif planes == 512:
                self.globalAvgPool = nn.AvgPool2d(7, stride=1)
            self.fc1 = nn.Linear(in_features=planes, out_features=round(planes / 16))
            self.fc2 = nn.Linear(in_features=round(planes / 16), out_features=planes)
            self.sigmod = nn.Sigmoid()
        if self.bam:
            self.bam = SpatialAttention()
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        if self.cam:
            ori_out = self.globalAvgPool(out)
            out = out.view(out.size(0), -1)
            out = self.fc1(out)
            out = self.relu(out)
            out = self.fc2(out)
            out = self.sigmod(out)
            out = out.view(out.size(0), out.size(-1), 1, 1)
            out = out * ori_out
        if self.bam:
            out = out * self.bam(out)
        out += identity
        out = self.relu(out)
        return out
 class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None, cam=False, bam=False):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        self.cam = cam
        self.bam = bam
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        if self.cam:
            if planes == 64:
                self.globalAvgPool = nn.AvgPool2d(56, stride=1)
            elif planes == 128:
                self.globalAvgPool = nn.AvgPool2d(28, stride=1)
            elif planes == 256:
                self.globalAvgPool = nn.AvgPool2d(14, stride=1)
            elif planes == 512:
                self.globalAvgPool = nn.AvgPool2d(7, stride=1)
            self.fc1 = nn.Linear(planes * self.expansion, round(planes / 4))
            self.fc2 = nn.Linear(round(planes / 4), planes * self.expansion)
            self.sigmod = nn.Sigmoid()
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        if self.cam:
            ori_out = self.globalAvgPool(out)
            out = out.view(out.size(0), -1)
            out = self.fc1(out)
            out = self.relu(out)
            out = self.fc2(out)
            out = self.sigmod(out)
            out = out.view(out.size(0), out.size(-1), 1, 1)
            out = out * ori_out
        out += identity
        out = self.relu(out)
        return out
 class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=conf.embedding_size, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None, scale=conf.channel_ratio):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        print("ResNet scale: >>>>>>>>>> ", scale)
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.adaptiveMaxPool = nn.AdaptiveMaxPool2d((1, 1))
        self.maxpool2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=0)
        )
        self.layer1 = self._make_layer(block, int(64 * scale), layers[0])
        self.layer2 = self._make_layer(block, int(128 * scale), layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, int(256 * scale), layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, int(512 * scale), layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(int(512 * block.expansion * scale), num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))
        return nn.Sequential(*layers)
    def _forward_impl(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x
    def forward(self, x):
        return self._forward_impl(x)
 # def _resnet(arch, block, layers, pretrained, progress, **kwargs):
 #     model = ResNet(block, layers, **kwargs)
 #     if pretrained:
 #         state_dict = load_state_dict_from_url(model_urls[arch],
 #                                               progress=progress)
 #         model.load_state_dict(state_dict, strict=False)
 #     return model
 class CustomResNet18(nn.Module):
    def __init__(self, model, num_classes=conf.custom_num_classes):
        super(CustomResNet18, self).__init__()
        self.custom_model = nn.Sequential(*list(model.children())[:-1])
        self.fc = nn.Linear(model.fc.in_features, num_classes)
    def forward(self, x):
        x = self.custom_model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
 def _resnet(arch, block, layers, pretrained, progress, **kwargs):
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        src_state_dict = state_dict
        target_state_dict = model.state_dict()
        skip_keys = []
        # skip mismatch size tensors in case of pretraining
        for k in src_state_dict.keys():
            if k not in target_state_dict:
                continue
            if src_state_dict[k].size() != target_state_dict[k].size():
                skip_keys.append(k)
        for k in skip_keys:
            del src_state_dict[k]
        missing_keys, unexpected_keys = model.load_state_dict(src_state_dict, strict=False)
    return model
 def resnet14(pretrained=True, progress=True, **kwargs):
    r"""ResNet-14 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet18', BasicBlock, [2, 1, 1, 2], pretrained, progress,
                   **kwargs)
 def resnet18(pretrained=True, progress=True, **kwargs):
    r"""ResNet-18 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
        **kwargs: Additional arguments passed to ResNet, including:
            scale (float): Channel scaling ratio (default: conf.channel_ratio)
    """
    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
                   **kwargs)
 def resnet34(pretrained=False, progress=True, **kwargs):
    r"""ResNet-34 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)
 def resnet50(pretrained=False, progress=True, **kwargs):
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)
 def resnet101(pretrained=False, progress=True, **kwargs):
    r"""ResNet-101 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
                   **kwargs)
 def resnet152(pretrained=False, progress=True, **kwargs):
    r"""ResNet-152 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
                   **kwargs)
 def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
    r"""ResNeXt-50 32x4d model from
    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['groups'] = 32
    kwargs['width_per_group'] = 4
    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
                   pretrained, progress, **kwargs)
 def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
    r"""ResNeXt-101 32x8d model from
    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['groups'] = 32
    kwargs['width_per_group'] = 8
    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
                   pretrained, progress, **kwargs)
 def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
    r"""Wide ResNet-50-2 model from
    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
    The model is the same as ResNet except for the bottleneck number of channels
    which is twice larger in every block. The number of channels in outer 1x1
    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
                   pretrained, progress, **kwargs)
 def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
    r"""Wide ResNet-101-2 model from
    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
    The model is the same as ResNet except for the bottleneck number of channels
    which is twice larger in every block. The number of channels in outer 1x1
    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    kwargs['width_per_group'] = 64 * 2
    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
                   pretrained, progress, **kwargs)
--- a/model/utils.py
+++ b/model/utils.py
@ -0,0 +1,4 @@
 try:
    from torch.hub import load_state_dict_from_url
 except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url
--- a/model/vit.py
+++ b/model/vit.py
@ -0,0 +1,42 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 import math
 import torch
 import torch.nn as nn
 from functools import partial, reduce
 from operator import mul
 from timm.models.vision_transformer import VisionTransformer, _cfg
 __all__ = [
    'vit_small',
    'vit_base',
 ]
 def vit_small(**kwargs):
    model = VisionTransformer(
        patch_size=16, embed_dim=384, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, num_classes=256,
        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
    # model.default_cfg = _cfg()
    return model
 def vit_base(**kwargs):
    model = VisionTransformer(
        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True, num_classes=256,
        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
    model.default_cfg = _cfg(num_classes=256)
    return model
 if __name__ == '__main__':
    img = torch.randn(8, 3, 224, 224)
    vit = vit_base()
    out = vit(img)
    print(out.shape)
    # print(count_parameters(vit))
--- a/test_ori.py
+++ b/test_ori.py
@ -0,0 +1,331 @@
 # -*- coding: utf-8 -*-
 import os.path as osp
 from typing import Dict, List, Set, Tuple
 import torch
 import torch.nn as nn
 import numpy as np
 from PIL import Image
 import json
 import matplotlib.pyplot as plt
 # from config import config as conf
 from tools.dataset import get_transform
 from configs import trainer_tools
 import yaml
 with open('configs/test.yml', 'r') as f:
    conf = yaml.load(f, Loader=yaml.FullLoader)
 # Constants from config
 embedding_size = conf["base"]["embedding_size"]
 img_size = conf["transform"]["img_size"]
 device = conf["base"]["device"]
 def unique_image(pair_list: str) -> Set[str]:
    unique_images = set()
    try:
        with open(pair_list, 'r') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    img1, img2, _ = line.split()
                    unique_images.update([img1, img2])
                except ValueError as e:
                    print(f"Skipping malformed line: {line}")
    except IOError as e:
        print(f"Error reading pair list file: {e}")
        raise
    return unique_images
 def group_image(images: Set[str], batch_size: int) -> List[List[str]]:
    """
    Group image paths into batches of specified size.
    Args:
        images: Set of image paths to group
        batch_size: Number of images per batch
    Returns:
        List of batches, where each batch is a list of image paths
    """
    image_list = list(images)
    num_images = len(image_list)
    batches = []
    for i in range(0, num_images, batch_size):
        batch_end = min(i + batch_size, num_images)
        batches.append(image_list[i:batch_end])
    return batches
 def _preprocess(images: list, transform) -> torch.Tensor:
    res = []
    for img in images:
        im = Image.open(img)
        im = transform(im)
        res.append(im)
    # data = torch.cat(res, dim=0)  # shape: (batch, 128, 128)
    # data = data[:, None, :, :]    # shape: (batch, 1, 128, 128)
    data = torch.stack(res)
    return data
 def test_preprocess(images: list, transform) -> torch.Tensor:
    res = []
    for img in images:
        im = Image.open(img)
        if im.mode == 'RGBA':
            im = im.convert('RGB')
        im = transform(im)
        res.append(im)
    data = torch.stack(res)
    return data
 def featurize(
    images: List[str], 
    transform: callable,
    net: nn.Module,
    device: torch.device,
    train: bool = False
 ) -> Dict[str, torch.Tensor]:
    try:
        # Select appropriate preprocessing
        preprocess_fn = _preprocess if train else test_preprocess
        # Preprocess and move to device
        data = preprocess_fn(images, transform)
        data = data.to(device)
        net = net.to(device)
        # Extract features with automatic mixed precision
        with torch.no_grad():
            if conf['models']['half']:
                data = data.half()
            features = net(data)
        # Create path-to-feature mapping
        return {img: feature for img, feature in zip(images, features)}
    except Exception as e:
        print(f"Error in feature extraction: {e}")
        raise
 def cosin_metric(x1, x2):
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
 def threshold_search(y_score, y_true):
    y_score = np.asarray(y_score)
    y_true = np.asarray(y_true)
    best_acc = 0
    best_th = 0
    for i in range(len(y_score)):
        th = y_score[i]
        y_test = (y_score >= th)
        acc = np.mean((y_test == y_true).astype(int))
        if acc > best_acc:
            best_acc = acc
            best_th = th
    return best_acc, best_th
 def showgrid(recall, recall_TN, PrecisePos, PreciseNeg, Correct):
    x = np.linspace(start=0, stop=1.0, num=50, endpoint=True).tolist()
    plt.figure(figsize=(10, 6))
    plt.plot(x, recall, color='red', label='recall:TP/TPFN')
    plt.plot(x, recall_TN, color='black', label='recall_TN:TN/TNFP')
    plt.plot(x, PrecisePos, color='blue', label='PrecisePos:TP/TPFN')
    plt.plot(x, PreciseNeg, color='green', label='PreciseNeg:TN/TNFP')
    plt.plot(x, Correct, color='m', label='Correct：(TN+TP)/(TPFN+TNFP)')
    plt.legend()
    plt.xlabel('threshold')
    # plt.ylabel('Similarity')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.savefig('grid.png')
    plt.show()
    plt.close()
 def showHist(same, cross):
    Same = np.array(same)
    Cross = np.array(cross)
    fig, axs = plt.subplots(2, 1)
    axs[0].hist(Same, bins=50, edgecolor='black')
    axs[0].set_xlim([-0.1, 1])
    axs[0].set_title('Same Barcode')
    axs[1].hist(Cross, bins=50, edgecolor='black')
    axs[1].set_xlim([-0.1, 1])
    axs[1].set_title('Cross Barcode')
    plt.savefig('plot.png')
 def compute_accuracy_recall(score, labels):
    th = 0.1
    squence = np.linspace(-1, 1, num=50)
    recall, PrecisePos, PreciseNeg, recall_TN, Correct = [], [], [], [], []
    Same = score[:len(score) // 2]
    Cross = score[len(score) // 2:]
    for th in squence:
        t_score = (score > th)
        t_labels = (labels == 1)
        TP = np.sum(np.logical_and(t_score, t_labels))
        FN = np.sum(np.logical_and(np.logical_not(t_score), t_labels))
        f_score = (score < th)
        f_labels = (labels == 0)
        TN = np.sum(np.logical_and(f_score, f_labels))
        FP = np.sum(np.logical_and(np.logical_not(f_score), f_labels))
        print("Threshold:{} TP:{},FP:{},TN:{},FN:{}".format(th, TP, FP, TN, FN))
        PrecisePos.append(0 if TP / (TP + FP) == 'nan' else TP / (TP + FP))
        PreciseNeg.append(0 if TN == 0 else TN / (TN + FN))
        recall.append(0 if TP == 0 else TP / (TP + FN))
        recall_TN.append(0 if TN == 0 else TN / (TN + FP))
        Correct.append(0 if TP == 0 else (TP + TN) / (TP + FP + TN + FN))
    showHist(Same, Cross)
    showgrid(recall, recall_TN, PrecisePos, PreciseNeg, Correct)
 def compute_accuracy(
    feature_dict: Dict[str, torch.Tensor],
    pair_list: str,
    test_root: str
 ) -> Tuple[float, float]:
    try:
        with open(pair_list, 'r') as f:
            pairs = f.readlines()
    except IOError as e:
        print(f"Error reading pair list: {e}")
        raise
    similarities = []
    labels = []
    for pair in pairs:
        pair = pair.strip()
        if not pair:
            continue
        try:
            img1, img2, label = pair.split()
            img1_path = osp.join(test_root, img1)
            img2_path = osp.join(test_root, img2)
            # Verify features exist
            if img1_path not in feature_dict or img2_path not in feature_dict:
                raise ValueError(f"Missing features for image pair: {img1_path}, {img2_path}")
            # Get features and compute similarity
            feat1 = feature_dict[img1_path].cpu().numpy()
            feat2 = feature_dict[img2_path].cpu().numpy()
            similarity = cosin_metric(feat1, feat2)
            similarities.append(similarity)
            labels.append(int(label))
        except Exception as e:
            print(f"Skipping invalid pair: {pair}. Error: {e}")
            continue
    # Find optimal threshold and accuracy
    accuracy, threshold = threshold_search(similarities, labels)
    compute_accuracy_recall(np.array(similarities), np.array(labels))
    return accuracy, threshold
 def deal_group_pair(pairList1, pairList2):
    allsimilarity = []
    one_similarity = []
    for pair1 in pairList1:
        for pair2 in pairList2:
            similarity = cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy())
            one_similarity.append(similarity)
    allsimilarity.append(max(one_similarity))  # 最大值
    # allsimilarity.append(sum(one_similarity) / len(one_similarity))  # 均值
    # allsimilarity.append(statistics.median(one_similarity))  # 中位数
    # print(allsimilarity)
    # print(labels)
    return allsimilarity
 def compute_group_accuracy(content_list_read):
    allSimilarity, allLabel = [], []
    Same, Cross = [], []
    for data_loaded in content_list_read:
        print(data_loaded)
        one_group_list = []
        try:
            for i in range(2):
                images = [osp.join(conf.test_val, img) for img in data_loaded[i]]
                group = group_image(images, conf.test_batch_size)
                d = featurize(group[0], conf.test_transform, model, conf.device)
                one_group_list.append(d.values())
            if data_loaded[-1] == '1':
                similarity = deal_group_pair(one_group_list[0], one_group_list[1])
                Same.append(similarity)
            else:
                similarity = deal_group_pair(one_group_list[0], one_group_list[1])
                Cross.append(similarity)
            allLabel.append(data_loaded[-1])
            allSimilarity.extend(similarity)
        except Exception as e:
            continue
        # print(allSimilarity)
        # print(allLabel)
    return allSimilarity, allLabel
 def init_model():
    tr_tools = trainer_tools(conf)
    backbone_mapping = tr_tools.get_backbone()
    if conf['models']['backbone'] in backbone_mapping:
        model = backbone_mapping[conf['models']['backbone']]().to(conf['base']['device'])
    else:
        raise ValueError('不支持该模型: {}'.format({conf['models']['backbone']}))
    print('load model {} '.format(conf['models']['backbone']))
    if torch.cuda.device_count() > 1 and conf['base']['distributed']:
        model = nn.DataParallel(model).to(conf['base']['device'])
        model.load_state_dict(torch.load(conf['models']['model_path'], map_location=conf['base']['device']))
        if conf['models']['half']:
            model.half()
        first_param_dtype = next(model.parameters()).dtype
        print("模型的第一个参数的数据类型: {}".format(first_param_dtype))
    else:
        model.load_state_dict(torch.load(conf['model']['model_path'], map_location=conf['base']['device']))
        if conf.model_half:
            model.half()
        first_param_dtype = next(model.parameters()).dtype
        print("模型的第一个参数的数据类型: {}".format(first_param_dtype))
    return model
 if __name__ == '__main__':
    model = init_model()
    model.eval()
    if not conf['data']['group_test']:
        images = unique_image(conf['data']['test_list'])
        images = [osp.join(conf['data']['test_dir'], img) for img in images]
        groups = group_image(images, conf['data']['test_batch_size'])  # 根据batch_size取图片
        feature_dict = dict()
        _, test_transform = get_transform(conf)
        for group in groups:
            d = featurize(group, test_transform, model, conf['base']['device'])
            feature_dict.update(d)
        accuracy, threshold = compute_accuracy(feature_dict, conf['data']['test_list'], conf['data']['test_dir'])
        print(
            "Test Model: {} Accuracy: {} Threshold: {}".format(conf['models']['model_path'], accuracy, threshold)
        )
    elif conf['data']['group_test']:
        filename = conf['data']['test_group_json']
        with open(filename, 'r', encoding='utf-8') as file:
            content_list_read = json.load(file)
        Similarity, Label = compute_group_accuracy(content_list_read)
        compute_accuracy_recall(np.array(Similarity), np.array(Label))
        # compute_group_accuracy(data_loaded)
--- a/tools/init.py
+++ b/tools/init.py
--- a/tools/pycache/gift_data_pretreatment.cpython-38.pyc
+++ b/tools/pycache/gift_data_pretreatment.cpython-38.pyc
--- a/tools/dataset.py
+++ b/tools/dataset.py
@ -0,0 +1,68 @@
 from torch.utils.data import DataLoader
 from torchvision.datasets import ImageFolder
 import torchvision.transforms.functional as F
 import torchvision.transforms as T
 # from config import config as conf
 import torch
 def pad_to_square(img):
    w, h = img.size
    max_wh = max(w, h)
    padding = [(max_wh - w) // 2, (max_wh - h) // 2, (max_wh - w) // 2, (max_wh - h) // 2]  # (left, top, right, bottom)
    return F.pad(img, padding, fill=0, padding_mode='constant')
 def get_transform(cfg):
    train_transform = T.Compose([
        T.Lambda(pad_to_square),  # 补边
        T.ToTensor(),
        T.Resize((cfg['transform']['img_size'], cfg['transform']['img_size']), antialias=True),
        # T.RandomCrop(img_size * 4 // 5),
        T.RandomHorizontalFlip(p=cfg['transform']['RandomHorizontalFlip']),
        T.RandomRotation(cfg['transform']['RandomRotation']),
        T.ColorJitter(brightness=cfg['transform']['ColorJitter']),
        T.ConvertImageDtype(torch.float32),
        T.Normalize(mean=[cfg['transform']['img_mean']], std=[cfg['transform']['img_std']]),
    ])
    test_transform = T.Compose([
        # T.Lambda(pad_to_square),  # 补边
        T.ToTensor(),
        T.Resize((cfg['transform']['img_size'], cfg['transform']['img_size']), antialias=True),
        T.ConvertImageDtype(torch.float32),
        T.Normalize(mean=[cfg['transform']['img_mean']], std=[cfg['transform']['img_std']]),
    ])
    return train_transform, test_transform
 def load_data(training=True, cfg=None):
    train_transform, test_transform = get_transform(cfg)
    if training:
        dataroot = cfg['data']['data_train_dir']
        transform = train_transform
        # transform = conf.train_transform
        batch_size = cfg['data']['train_batch_size']
    else:
        dataroot = cfg['data']['data_val_dir']
        # transform = conf.test_transform
        transform = test_transform
        batch_size = cfg['data']['val_batch_size']
    data = ImageFolder(dataroot, transform=transform)
    class_num = len(data.classes)
    loader = DataLoader(data,
                        batch_size=batch_size,
                        shuffle=True,
                        pin_memory=cfg['base']['pin_memory'],
                        num_workers=cfg['data']['num_workers'],
                        drop_last=True)
    return loader, class_num
 # def load_gift_data(action):
 #         train_data = ImageFolder(conf.train_gift_root, transform=conf.train_transform)
 #         train_dataset = DataLoader(train_data, batch_size=conf.train_gift_batchsize, shuffle=True,
 #                             pin_memory=conf.pin_memory, num_workers=conf.num_workers, drop_last=True)
 #         val_data = ImageFolder(conf.test_gift_root, transform=conf.test_transform)
 #         val_dataset = DataLoader(val_data, batch_size=conf.val_gift_batchsize, shuffle=True,
 #                             pin_memory=conf.pin_memory, num_workers=conf.num_workers, drop_last=True)
 #         test_data = ImageFolder(conf.test_gift_root, transform=conf.test_transform)
 #         test_dataset = DataLoader(test_data, batch_size=conf.test_gift_batchsize, shuffle=True,
 #                             pin_memory=conf.pin_memory, num_workers=conf.num_workers, drop_last=True)
 #         return train_dataset, val_dataset, test_dataset
--- a/tools/dataset.txt
+++ b/tools/dataset.txt
@ -0,0 +1,10 @@
 ./quant_imgs/20179457_20240924-110903_back_addGood_b82d2842766e_80_15583929052_tid-8_fid-72_bid-3.jpg
 ./quant_imgs/6928926002103_20240309-195044_front_returnGood_70f75407ef0e_225_18120111822_14_01.jpg
 ./quant_imgs/6928926002103_20240309-212145_front_returnGood_70f75407ef0e_225_18120111822_11_01.jpg
 ./quant_imgs/6928947479083_20241017-133830_front_returnGood_5478c9a48b7e_10_13799009402_tid-1_fid-20_bid-1.jpg
 ./quant_imgs/6928947479083_20241018-110450_front_addGood_5478c9a48c28_165_13773168720_tid-6_fid-36_bid-1.jpg
 ./quant_imgs/6930044166421_20240117-141516_c6a23f41-5b16-44c6-a03e-c32c25763442_back_returnGood_6930044166421_17_01.jpg
 ./quant_imgs/6930044166421_20240308-150916_back_returnGood_70f75407ef0e_175_13815402763_7_01.jpg
 ./quant_imgs/6930044168920_20240117-165633_3303629b-5fbd-423b-913d-8a64c1aa51dc_front_addGood_6930044168920_26_01.jpg
 ./quant_imgs/6930058201507_20240305-175434_front_addGood_70f75407ef0e_95_18120111822_28_01.jpg
 ./quant_imgs/6930639267885_20241014-120446_back_addGood_5478c9a48c3e_135_13773168720_tid-5_fid-99_bid-0.jpg
--- a/tools/fp32comparefp16.py
+++ b/tools/fp32comparefp16.py
@ -0,0 +1,112 @@
 import os
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from test_ori import group_image, init_model, featurize
 from config import config as conf
 import json
 import os.path as osp
 def compare_fp16_fp32(values_pf16, values_pf32, dataTest):
    if dataTest:
        norm_values_pf16 = torch.norm(values_pf16, p=2)
        norm_values_pf32 = torch.norm(values_pf32, p=2)
        euclidean_distance = torch.norm(norm_values_pf16 - norm_values_pf32, p=2)
        print(f"欧几里得距离: {euclidean_distance}")
        cosine_sim = torch.dot(values_pf16.float(), values_pf32) / (norm_values_pf16 * norm_values_pf32)
        print(f"余弦相似度: {cosine_sim}")
    else:
        pass
 def cosin_metric(x1, x2, fp32=True):
    if fp32:
        return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
    else:
        x1_fp16 = x1.astype(np.float16)
        x2_fp16 = x2.astype(np.float16)
        # print(type(x1))
        # pdb.set_trace()
        return np.dot(x1_fp16, x2_fp16) / (np.linalg.norm(x1_fp16) * np.linalg.norm(x2_fp16))
 def deal_group_pair(pairList1, pairList2):
    one_similarity_fp16, one_similarity_fp32, allsimilarity_fp32, allsimilarity_fp16 = [], [], [], []
    for pair1 in pairList1:
        for pair2 in pairList2:
            # similarity = cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy())
            one_similarity_fp32.append(cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy(), True))
            one_similarity_fp16.append(cosin_metric(pair1.cpu().numpy(), pair2.cpu().numpy(), False))
        allsimilarity_fp32.append(one_similarity_fp32)
        allsimilarity_fp16.append(one_similarity_fp16)
        one_similarity_fp16, one_similarity_fp32 = [], []
    return np.array(allsimilarity_fp32), np.array(allsimilarity_fp16)
 def compute_group_accuracy(content_list_read, model):
    allSimilarity, allLabel = [], []
    Same, Cross = [], []
    flag_same = True
    flag_diff = True
    for data_loaded in content_list_read:
        one_group_list = []
        try:
            if (flag_same and str(data_loaded[-1]) == '1') or (flag_diff and str(data_loaded[-1]) == '0'):
                for i in range(2):
                    images = [osp.join(conf.test_val, img) for img in data_loaded[i]]
                    group = group_image(images, conf.test_batch_size)
                    d = featurize(group[0], conf.test_transform, model, conf.device)
                    one_group_list.append(d.values())
                if str(data_loaded[-1]) == '1':
                    flag_same = False
                    allsimilarity_fp32, allsimilarity_fp16 = deal_group_pair(one_group_list[0], one_group_list[1])
                    print('fp32 same-- >', allsimilarity_fp32)
                    print('fp16 same-- >', allsimilarity_fp16)
                else:
                    flag_diff = False
                    allsimilarity_fp32, allsimilarity_fp16 = deal_group_pair(one_group_list[0], one_group_list[1])
                    print('fp32 diff-- >', allsimilarity_fp32)
                    print('fp16 diff-- >', allsimilarity_fp16)
        except Exception as e:
            continue
        # print(allSimilarity)
        # print(allLabel)
    return allSimilarity, allLabel
 def get_feature_list(imgPth):
    imgs = get_files(imgPth)
    group = group_image(imgs, conf.test_batch_size)
    model = init_model()
    model.eval()
    fe = featurize(group[0], conf.test_transform, model, conf.device)
    return fe
 def get_files(imgPth):
    imgsList = []
    for img in os.walk(imgPth):
        for img_name in img[2]:
            img_path = os.sep.join([img[0], img_name])
            imgsList.append(img_path)
    return imgsList
 import pdb
 def compare(imgPth, group=False):
    model = init_model()
    model.eval()
    if not group:
        values_pf16, values_pf32 = [], []
        fe = get_feature_list(imgPth)
        # pdb.set_trace()
        values_pf32 += [value.cpu() for value in fe.values()]
        values_pf16 += [value.cpu().half() for value in fe.values()]
        for value_pf16, value_pf32 in zip(values_pf16, values_pf32):
            compare_fp16_fp32(value_pf16, value_pf32, dataTest=True)
    else:
        filename = conf.test_group_json
        with open(filename, 'r', encoding='utf-8') as file:
            content_list_read = json.load(file)
        compute_group_accuracy(content_list_read, model)
        pass
 if __name__ == '__main__':
    imgPth = './data/test/inner/3701375401900'
    compare(imgPth)
--- a/tools/gift_assessment.py
+++ b/tools/gift_assessment.py
@ -0,0 +1,369 @@
 import os
 import pdb
 import shutil
 import sys
 sys.path.append('../model')
 import matplotlib.pyplot as plt
 import numpy as np
 from model.mlp import Net2, Net3, Net4
 from model import resnet18
 import torch
 from gift_data_pretreatment import getFeatureList
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def init_model(pkl_flag):
    res_pth = r"../checkpoints/resnet18_1009/best.pth"
    if pkl_flag:
        gift_pth = r'../checkpoints/gift_model/action2/gift_v11.pth'
        gift_model = Net3(pretrained=True, num_classes=1)
        gift_model.load_state_dict(torch.load(gift_pth))
    else:
        gift_pth = r'../checkpoints/gift_model/action3/best.pth'
        gift_model = Net4('resnet18', True, True)  # 预训练模型
        try:
            print('>>multiple_cards load pre model <<')
            gift_model.load_state_dict({k.replace('module.', ''): v for k, v in
                                        torch.load(gift_pth,
                                                   map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')).items()})
        except Exception as e:
            print('>> load pre model <<')
            gift_model.load_state_dict(torch.load(gift_pth,
                                                  map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
    res_model = resnet18()
    res_model.load_state_dict({k.replace('module.', ''): v for k, v in
                               torch.load(res_pth, map_location=torch.device(device)).items()})
    return res_model, gift_model
 def showHist(nongifts, gifts):
    # Same = filtered_data[:, 1].astype(np.float32)
    # Cross = filtered_data[:, 2].astype(np.float32)
    fig, axs = plt.subplots(2, 1)
    axs[0].hist(nongifts, bins=50, edgecolor='blue')
    axs[0].set_xlim([-0.1, 1])
    axs[0].set_title('nongifts')
    axs[1].hist(gifts, bins=50, edgecolor='green')
    axs[1].set_xlim([-0.1, 1])
    axs[1].set_title('gifts')
    # plt.savefig('plot.png')
    plt.show()
 def calculate_precision_recall(nongift, gift, points):
    precision, recall = [], []
    for point in points:
        TP = np.sum(gift > point)
        FN = np.sum(gift < point)
        FP = np.sum(nongift > point)
        TN = np.sum(nongift < point)
        if TP == 0:
            precision.append(0)
            recall.append(0)
        else:
            precision.append(TP / (TP + FP))
            recall.append(TP / (TP + FN))
            print("point >> {} TP>>{}, FP>>{}, TN>>{}, FN>>{}".format(point, TP, FP, TN, FN))
        if point == 0.5:
            print("point >> {} TP>>{}, FP>>{}, TN>>{}, FN>>{}".format(point, TP, FP, TN, FN))
    return precision, recall
 def showgrid(all_prec, all_recall, points):
    plt.figure(figsize=(10, 6))
    plt.plot(points[:-1], all_prec[:-1], color='blue', label='precision')
    plt.plot(points[:-1], all_recall[:-1], color='red', label='recall')
    plt.legend()
    plt.xlabel('threshold')
    # plt.ylabel('Similarity')
    plt.grid(True, linestyle='--', alpha=0.5)
    # plt.savefig('grid.png')
    plt.show()
    plt.close()
    pass
 def discriminate_action(roots):  # 判断加购还是退购
    pth = os.sep.join([roots, 'process.data'])
    with open(pth, 'r') as f:
        lines = f.readlines()
    for line in lines:
        content = line.strip()
        if 'weightValue' in content:
            # print(content.split(":")[-1].split(',')[0])
            if int(content.split(":")[-1].split(',')[0]) > 0:
                return 'add'
            else:
                return 'return'
 def median(lst):
    sorted_lst = sorted(lst)
    n = len(sorted_lst)
    if n % 2 == 1:
        # 如果列表长度是奇数，中位数是中间的那个元素
        return sorted_lst[n // 2]
    else:
        # 如果列表长度是偶数，中位数是中间两个元素的平均值
        mid1 = sorted_lst[(n // 2) - 1]
        mid2 = sorted_lst[n // 2]
        return (mid1 + mid2) / 2
 def get_special_data(data, p):
    # print(data)
    length = len(data)
    if length > 5:
        if p == 'max':
            return max(data[:round(length * 0.5)])
        elif p == 'average':
            return sum(data[:round(length * 0.5)]) / len(data[:round(length * 0.5)])
        elif p == 'median':
            return median(data[:round(length * 0.5)])
    else:
        return sum(data) / len(data)
 def read_data_file(pth):
    result = []
    with open(pth, 'r') as data_file:
        lines = data_file.readlines()
    for line in lines:
        if line.split(':')[0] == 'free_gift__result':
            if '0_tracking_output.data' in pth:
                result = line.split(':')[1].split(',')[:-1]
            else:
                result = line.split(':')[1].split(',')[:-2]
            result = [float(i) for i in result]
    return result
 def get_tracking_data(pth):
    result = []
    with open(pth, 'r') as data_file:
        lines = data_file.readlines()
    for line in lines:
        if len(line.split(',')) == 65:
            result.append([float(item) for item in line.split(',')[:-1]])
    return result
 def clean_reurn_data(pth):
    for roots, dirs, files in os.walk(pth):
        # print(roots, dirs, files)
        if len(dirs) == 0:
            flag = discriminate_action(roots)
            if flag == 'return':
                shutil.rmtree(roots)
 def get_gift_files(pth):  # 测试后直接分析测试结果文件
    add_special_output_0, return_special_output_0, return_special_output_1, add_special_output_1 = [], [], [], []
    add_tracking_output_0, return_tracking_output_0, add_tracking_output_1, return_tracking_output_1 = [], [], [], []
    for roots, dirs, files in os.walk(pth):
        # print(roots, dirs, files)
        if len(dirs) == 0:
            flag = discriminate_action(roots)
            for file in files:
                if file == '0_tracking_output.data':
                    result = read_data_file(os.path.join(roots, file))
                    if not len(result) == 0:
                        if flag == 'add':
                            add_special_output_0.append(get_special_data(result, 'average'))  # 加购后摄
                        else:
                            return_special_output_0.append(get_special_data(result, 'average'))  # 退购后摄
                    if flag == 'add':
                        add_tracking_output_0 += read_data_file(os.path.join(roots, file))
                    else:
                        return_tracking_output_0 += read_data_file(os.path.join(roots, file))
                elif file == '1_tracking_output.data':
                    result = read_data_file(os.path.join(roots, file))
                    if not len(result) == 0:
                        if flag == 'add':
                            add_special_output_1.append(get_special_data(result, 'average'))  # 加购前摄
                        else:
                            return_special_output_1.append(get_special_data(result, 'average'))  # 退购前摄
                    if flag == 'add':
                        add_tracking_output_1 += read_data_file(os.path.join(roots, file))
                    else:
                        return_tracking_output_1 += read_data_file(os.path.join(roots, file))
    comprehensive_dicts = {"add_special_output_0": add_special_output_0,
                           "return_special_output_0": return_special_output_0,
                           "add_tracking_output_0": add_tracking_output_0,
                           "return_tracking_output_0": return_tracking_output_0,
                           "add_special_output_1": add_special_output_1,
                           "return_special_output_1": return_special_output_1,
                           "add_tracking_output_1": add_tracking_output_1,
                           "return_tracking_output_1": return_tracking_output_1,
                           }
    # print(tracking_output_0, tracking_output_1)
    showHist(np.array(comprehensive_dicts['add_tracking_output_0']),
             np.array(comprehensive_dicts['add_tracking_output_1']))
    # showHist(np.array(comprehensive_dicts['add_special_output_0']),
    #          np.array(comprehensive_dicts['add_special_output_1']))
    return comprehensive_dicts
 def get_feature_array(img_pth_lists, res_model, gift_model, pkl_flag=True):
    features_np = []
    if pkl_flag:
        for img_lists in img_pth_lists:
            # print(img_lists)
            fe_nps = getFeatureList(None, img_lists, res_model)
            # fe_nps.squeeze()
            try:
                fe_nps = fe_nps[0][:, 256:]
            except Exception as e:
                print(e)
                continue
            fe_nps = torch.from_numpy(fe_nps)
            fe_nps = fe_nps.view(fe_nps.shape[0], 64, 13, 13)
            if len(fe_nps):
                fe_np = gift_model(fe_nps)
                fe_np = np.squeeze(fe_np.detach().numpy())
                features_np.append(fe_np)
    else:
        for img_lists in img_pth_lists:
            fe_nps = getFeatureList(None, img_lists, gift_model)
            if len(fe_nps) > 0:
                fe_nps = np.concatenate(fe_nps)
            features_np.append(fe_nps)
    return features_np
 import pickle
 def create_gift_subimg_np(data_pth, pkl_flag):
    gift_array_pth = os.path.join(data_pth, 'gift.pkl')
    nongift_array_pth = os.path.join(data_pth, 'nongift.pkl')
    res_model, gift_model = init_model(pkl_flag)
    res_model = res_model.eval()
    gift_model = gift_model.eval()
    gift_img_pth_list, gift_lists, nongift_img_pth_list, nongift_lists = [], [], [], []
    for root, dirs, files in os.walk(data_pth):
        if ('commodity' in root and 'subimg' in root):
            print("commodity >> {}".format(root))
            for file in files:
                nongift_img_pth_list.append(os.sep.join([root, file]))
            nongift_lists.append(nongift_img_pth_list)
            nongift_img_pth_list = []
        elif ('Havegift' in root and 'subimg' in root):
            print("Havegift >> {}".format(root))
            for file in files:
                gift_img_pth_list.append(os.sep.join([root, file]))
            gift_lists.append(gift_img_pth_list)
            gift_img_pth_list = []
    nongift = get_feature_array(nongift_lists, res_model, gift_model, pkl_flag)
    gift = get_feature_array(gift_lists, res_model, gift_model, pkl_flag)
    with open(nongift_array_pth, 'wb') as file:
        pickle.dump(nongift, file)
    with open(gift_array_pth, 'wb') as file:
        pickle.dump(gift, file)
 def top_25_percent_mean(arr):
    # 1. 对数组进行从高到低排序
    sorted_arr = np.sort(arr)[::-1]
    # 2. 计算数组长度的25%
    top_25_percent_length = int(len(sorted_arr) * 0.25)
    # 3. 取排序后数组的前25%元素
    top_25_percent = sorted_arr[:top_25_percent_length]
    # 4. 计算这些元素的平均值
    mean_value = np.mean(top_25_percent)
    return top_25_percent
 def assess_gift_subimg(data_pth, pkl_flag=False):  # 分析分割后子图,
    points = (np.linspace(1, 100, 100)) / 100
    gift_pkl_pth = os.path.join(data_pth, 'gift.pkl')
    nongift_pkl_pth = os.path.join(data_pth, 'nongift.pkl')
    if not os.path.exists(gift_pkl_pth):
        create_gift_subimg_np(data_pth, pkl_flag)
    with open(nongift_pkl_pth, 'rb') as f:
        nongift = pickle.load(f)
    with open(gift_pkl_pth, 'rb') as f:
        gift = pickle.load(f)
    # showHist(nongift.flatten(), gift.flatten())
    '''
    一分位均值
    '''
    nongift_mean = [np.mean(top_25_percent_mean(items)) for items in nongift]
    gift_mean = [np.mean(top_25_percent_mean(items)) for items in gift]
    '''
    中位数
    '''
    # nongift_mean = [np.median(items) for items in nongift]
    # gift_mean = [np.median(items) for items in gift] # 平均值
    '''
    全部结果
    '''
    # nongifts = [items for items in nongift]
    # gifts = [items for items in gift]
    # showHist(nongifts, gifts)
    '''
    平均值
    '''
    # nongift_mean = [np.mean(items) for items in nongift]
    # gift_mean = [np.mean(items) for items in gift]
    showHist(np.array(nongift_mean), np.array(gift_mean))  # 最大值
    precision, recall = calculate_precision_recall(np.array(nongift_mean),
                                                   np.array(gift_mean),
                                                   points)
    showgrid(precision, recall, points)
 def get_comprehensive_dicts(data_pth):
    gift_pth = r'../checkpoints/gift_model/action2/best.pth'
    g_model = Net3(pretrained=True, num_classes=1)
    g_model.load_state_dict(torch.load(gift_pth))
    g_model.eval()
    result = []
    file_name = ['0_tracking_output.data',
                 '1_tracking_output.data']
    for root, dirs, files in os.walk(data_pth):
        if not len(dirs):
            for file in files:
                if file in file_name:
                    print(os.path.join(root, file))
                    result += get_tracking_data(os.path.join(root, file))
    result = torch.from_numpy(np.array(result))
    input = result.view(result.shape[0], 64, 1, 1)
    input = input.to('cpu')
    input = input.to(torch.float32)
    ji = g_model(input)
    print(ji)
 if __name__ == '__main__':
    # pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\赠品测试\\20241203赠品测试数据\\赠品\\images'
    # pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\赠品测试\\20241203赠品测试数据\\没有赠品的商品\\images'
    # pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\赠品测试\\20241203赠品测试数据\\同样的商品没有捆绑赠品\\images'
    # pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\赠品测试\\20241213赠品测试数据\\赠品'
    # pth = r'C:\Users\HP\Desktop\zengpin\1227'
    # get_gift_files(pth)
    # 根据子图分析结果
    pth = r'D:\Project\contrast_nettest\data\gift_test'
    assess_gift_subimg(pth)
    # 根据完整数据集分析结果
    # pth = r'C:\Users\HP\Desktop\zengpin\1231'
    # get_comprehensive_dicts(pth)
 # 删除退购视频
 #  pth = r'C:\Users\HP\Desktop\gift_test\20241213\非赠品'
 #  clean_reurn_data(pth)
--- a/tools/gift_data_pretreatment.py
+++ b/tools/gift_data_pretreatment.py
@ -0,0 +1,92 @@
 import torch
 from config import config as conf
 from PIL import Image
 import numpy as np
 def convert_rgba_to_rgb(image_path, output_path=None):
    """
    将给定路径的4通道PNG图像转换为3通道，并保存到指定输出路径。
    :param image_path: 输入图像的路径
    :param output_path: 转换后的图像保存路径
    """
    # 打开图像
    img = Image.open(image_path)
    # 转换图像模式从RGBA到RGB
    # .convert('RGB')会丢弃Alpha通道并转换为纯RGB图像
    if img.mode == 'RGBA':
        # 转换为RGB模式
        img_rgb = img.convert('RGB')
        # 保存转换后的图像
        img_rgb.save(image_path)
        # print(f"Image converted from RGBA to RGB and saved to {image_path}")
    # else:
    #     # 如果已经是RGB或其他模式，直接保存
    #     img.save(image_path)
    #     print(f"Image already in {img.mode} mode, saved to {image_path}")
 def test_preprocess(images: list, actionModel=False) -> torch.Tensor:
    res = []
    for img in images:
        try:
            # print(img)
            im = conf.test_transform(img) if actionModel else conf.test_transform(Image.open(img))
            res.append(im)
        except:
            continue
    data = torch.stack(res)
    return data
 def inference(images, model, actionModel=False):
    data = test_preprocess(images, actionModel)
    if torch.cuda.is_available():
        data = data.to(conf.device)
    features = model(data)
    return features
 def group_image(images, batch=64) -> list:
    """Group image paths by batch size"""
    size = len(images)
    res = []
    for i in range(0, size, batch):
        end = min(batch + i, size)
        res.append(images[i:end])
    return res
 def normalize(queFeatList):
    for num1 in range(len(queFeatList)):
        for num2 in range(len(queFeatList[num1])):
            queFeatList[num1][num2] = queFeatList[num1][num2] / np.linalg.norm(queFeatList[num1][num2])
    return queFeatList
 def getFeatureList(barList, imgList, model):
    # featList = [[] for i in range(len(barList))]
    # for index, feat in enumerate(imgList):
    fe_nps = []
    groups = group_image(imgList)
    for group in groups:
        feat_tensor = inference(group, model)
        # for fe in feat_tensor:
        if feat_tensor.device == 'cpu':
            fe_np = feat_tensor.squeeze().detach().numpy()
            # fe_np = fe_np[:, 256:]
            # fe_np = fe_np.reshape(fe_np.shape[0], fe_np.shape[1], 1, 1)
        else:
            fe_np = feat_tensor.squeeze().detach().cpu().numpy()
            # fe_np = fe_np[:, 256:]
            # fe_np = fe_np[256:]
            # fe_np = fe_np.reshape(fe_np.shape[0], fe_np.shape[1], 1, 1)
            # fe_np = fe_np.reshape(1, fe_np.shape[0], 1, 1)
            # print(fe_np)
        fe_nps.append(fe_np)
    # if fe_nps:
    #     merged_fe_np = np.concatenate(fe_nps, axis=0)
    # else:
    #     merged_fe_np = np.array([])  #
    # fe_list = normalize(fe_nps)
    return fe_nps
--- a/tools/json_contrast.py
+++ b/tools/json_contrast.py
@ -0,0 +1,118 @@
 import json
 import numpy as np
 import matplotlib.pyplot as plt
 import numpy as np
 import random
 def showHist(same, cross):
    Same = np.array(same)
    Cross = np.array(cross)
    fig, axs = plt.subplots(2, 1)
    axs[0].hist(Same, bins=50, edgecolor='black')
    axs[0].set_xlim([-0.1, 1])
    axs[0].set_title('Same Barcode')
    axs[1].hist(Cross, bins=50, edgecolor='black')
    axs[1].set_xlim([-0.1, 1])
    axs[1].set_title('Cross Barcode')
    # plt.savefig('plot.png')
    plt.show()
 def showgrid(recall, recall_TN, PrecisePos, PreciseNeg, Correct):
    x = np.linspace(start=0, stop=1.0, num=50, endpoint=True).tolist()
    plt.figure(figsize=(10, 6))
    plt.plot(x, recall, color='red', label='recall:TP/TPFN')
    plt.plot(x, recall_TN, color='black', label='recall_TN:TN/TNFP')
    plt.plot(x, PrecisePos, color='blue', label='PrecisePos:TP/TPFN')
    plt.plot(x, PreciseNeg, color='green', label='PreciseNeg:TN/TNFP')
    plt.plot(x, Correct, color='m', label='Correct：(TN+TP)/(TPFN+TNFP)')
    plt.legend()
    plt.xlabel('threshold')
    # plt.ylabel('Similarity')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.savefig('grid.png')
    plt.show()
    plt.close()
 def compute_accuracy_recall(score, labels):
    th = 0.1
    squence = np.linspace(-1, 1, num=50)
    recall, PrecisePos, PreciseNeg, recall_TN, Correct = [], [], [], [], []
    Same = score[:len(score) // 2]
    Cross = score[len(score) // 2:]
    for th in squence:
        t_score = (score > th)
        t_labels = (labels == 1)
        TP = np.sum(np.logical_and(t_score, t_labels))
        FN = np.sum(np.logical_and(np.logical_not(t_score), t_labels))
        f_score = (score < th)
        f_labels = (labels == 0)
        TN = np.sum(np.logical_and(f_score, f_labels))
        FP = np.sum(np.logical_and(np.logical_not(f_score), f_labels))
        print("Threshold:{} TP:{},FP:{},TN:{},FN:{}".format(th, TP, FP, TN, FN))
        PrecisePos.append(0 if TP / (TP + FP) == 'nan' else TP / (TP + FP))
        PreciseNeg.append(0 if TN == 0 else TN / (TN + FN))
        recall.append(0 if TP == 0 else TP / (TP + FN))
        recall_TN.append(0 if TN == 0 else TN / (TN + FP))
        Correct.append(0 if TP == 0 else (TP + TN) / (TP + FP + TN + FN))
    showHist(Same, Cross)
    showgrid(recall, recall_TN, PrecisePos, PreciseNeg, Correct)
 def get_similarity(features1, features2, n, m):
    features1 = np.array(features1)
    features2 = np.array(features2)
    all_similarity = []
    for feature1 in features1:
        for feature2 in features2:
            similarity = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
            all_similarity.append(similarity)
    test_similarity = np.array(all_similarity)
    np_all_array = np.array(all_similarity).reshape(len(features1), len(features2))
    if n == 5 and m == 5:
        print(all_similarity)
    return np.mean(np_all_array), all_similarity
    # return sum(all_similarity)/len(all_similarity), all_similarity
    # return max(all_similarity), all_similarity
 def deal_similarity(dicts):
    all_similarity = []
    similarity = []
    same_barcode, diff_barcode = [], []
    for n, (key1, value1) in enumerate(dicts.items()):
        print('key1 >> {}'.format(key1))
        for m, (key2, value2) in enumerate(dicts.items()):
            print('key1 >> {}  key2 >> {}  peidui {}{}'.format(key1, key2, n, m))
            max_similarity, some_similarity = get_similarity(value1, value2, n, m)
            similarity.append(max_similarity)
            if key1 == key2:
                same_barcode += some_similarity
            else:
                diff_barcode += some_similarity
        all_similarity.append(similarity)
        similarity = []
    all_similarity = np.array(all_similarity)
    random.shuffle(diff_barcode)
    same_list = [1] * len(same_barcode)
    diff_list = [0] * len(same_barcode)
    all_list = same_list + diff_list
    all_score = same_barcode + diff_barcode[:len(same_barcode)]
    compute_accuracy_recall(np.array(all_score), np.array(all_list))
    print(all_similarity.shape)
 with open('../search_library/data_zhanting.json', 'r') as file:
    data = json.load(file)
 dicts = {}
 for dict in data['total']:
    key = dict['key']
    value = dict['value']
    dicts[key] = value
 deal_similarity(dicts)
--- a/tools/model_onnx_transform.py
+++ b/tools/model_onnx_transform.py
@ -0,0 +1,63 @@
 import pdb
 import torch
 import torch.nn as nn
 from model import resnet18
 from config import config as conf
 from collections import OrderedDict
 import cv2
 def tranform_onnx_model(model_name, pretrained_weights='checkpoints/v3_small.pth'):
    # 定义模型
    if model_name == 'resnet18':
        model = resnet18(scale=0.75)
    print('model_name >>> {}'.format(model_name))
    if conf.multiple_cards:
        model = model.to(torch.device('cpu'))
        checkpoint = torch.load(pretrained_weights)
        new_state_dict = OrderedDict()
        for k, v in checkpoint.items():
            name = k[7:]  # remove "module."
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)
    else:
        model.load_state_dict(torch.load(pretrained_weights, map_location=torch.device('cpu')))
    # try:
    #     model.load_state_dict(torch.load(pretrained_weights, map_location=torch.device('cpu')))
    # except Exception as e:
    #     print(e)
    #     # model.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(pretrained_weights, map_location='cpu').items()})
    #     model = nn.DataParallel(model).to(conf.device)
    #     model.load_state_dict(torch.load(conf.test_model, map_location=torch.device('cpu')))
    # 转换为ONNX
    if model_name == 'gift_type2':
        input_shape = [1, 64, 13, 13]
    elif model_name == 'gift_type3':
        input_shape = [1, 3, 224, 224]
    else:
        # 假设输入数据的大小是通道数*高度*宽度，例如3*224*224
        input_shape = [1, 3, 224, 224]
    img = cv2.imread('./dog_224x224.jpg')
    output_file = pretrained_weights.replace('pth', 'onnx')
    # 导出模型
    torch.onnx.export(model,
                      torch.randn(input_shape),
                      output_file,
                      verbose=True,
                      input_names=['input'],
                      output_names=['output'])  ##, optset_version=12
    model.eval()
    trace_model = torch.jit.trace(model, torch.randn(1, 3, 224, 224))
    trace_model.save(output_file.replace('.onnx', '.pt'))
    print(f"Model exported to {output_file}")
 if __name__ == '__main__':
    tranform_onnx_model(model_name='resnet18',  # ['resnet18', 'gift_type2', 'gift_type3'] #gift_type2指resnet18中间数据判断；gift3_type3指resnet原图计算推理
                        pretrained_weights='./checkpoints/resnet18_scale=1.0/best.pth')
--- a/tools/model_rknn_transform.py
+++ b/tools/model_rknn_transform.py
@ -0,0 +1,186 @@
 import os
 import pdb
 import urllib
 import traceback
 import time
 import sys
 import numpy as np
 import cv2
 from config import config as conf
 from rknn.api import RKNN
 import config
 # ONNX_MODEL = 'resnet50v2.onnx'
 # RKNN_MODEL = 'resnet50v2.rknn'
 ONNX_MODEL = 'checkpoints/resnet18_scale=1.0/best.onnx'
 RKNN_MODEL = 'checkpoints/resnet18_scale=1.0/best.rknn'
 # ONNX_MODEL = 'v3_small_0424.onnx'
 # RKNN_MODEL = 'v3_small_0424.rknn'
 def show_outputs(outputs):
    # print('***************outputs', outputs)
    output = outputs[0][0]
    # print('len(outputs)',len(output), output)
    output_sorted = sorted(output, reverse=True)
    top5_str = 'resnet50v2\n-----TOP 5-----\n'
    for i in range(5):
        value = output_sorted[i]
        index = np.where(output == value)
        for j in range(len(index)):
            if (i + j) >= 5:
                break
            if value > 0:
                topi = '{}: {}\n'.format(index[j], value)
            else:
                topi = '-1: 0.0\n'
            top5_str += topi
    # pdb.set_trace()
    print(top5_str)
 def readable_speed(speed):
    speed_bytes = float(speed)
    speed_kbytes = speed_bytes / 1024
    if speed_kbytes > 1024:
        speed_mbytes = speed_kbytes / 1024
        if speed_mbytes > 1024:
            speed_gbytes = speed_mbytes / 1024
            return "{:.2f} GB/s".format(speed_gbytes)
        else:
            return "{:.2f} MB/s".format(speed_mbytes)
    else:
        return "{:.2f} KB/s".format(speed_kbytes)
 def show_progress(blocknum, blocksize, totalsize):
    speed = (blocknum * blocksize) / (time.time() - start_time)
    speed_str = " Speed: {}".format(readable_speed(speed))
    recv_size = blocknum * blocksize
    f = sys.stdout
    progress = (recv_size / totalsize)
    progress_str = "{:.2f}%".format(progress * 100)
    n = round(progress * 50)
    s = ('#' * n).ljust(50, '-')
    f.write(progress_str.ljust(8, ' ') + '[' + s + ']' + speed_str)
    f.flush()
    f.write('\r\n')
 if __name__ == '__main__':
    # Create RKNN object
    rknn = RKNN(verbose=True)
    # If resnet50v2 does not exist, download it.
    # Download address:
    # https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.onnx
    if not os.path.exists(ONNX_MODEL):
        print('--> Download {}'.format(ONNX_MODEL))
        url = 'https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.onnx'
        download_file = ONNX_MODEL
        try:
            start_time = time.time()
            urllib.request.urlretrieve(url, download_file, show_progress)
        except:
            print('Download {} failed.'.format(download_file))
            print(traceback.format_exc())
            exit(-1)
        print('done')
    # pre-process config
    print('--> config model')
    # rknn.config(mean_values=[123.675, 116.28, 103.53], std_values=[58.82, 58.82, 58.82])
    rknn.config(
        mean_values=[[127.5, 127.5, 127.5]],
        std_values=[[127.5, 127.5, 127.5]],
        target_platform='rk3588',
        model_pruning=False,
        compress_weight=False,
        single_core_mode=True)
    # rknn.config(
    #     mean_values=[[127.5, 127.5, 127.5]],  # 对于单通道图像，可以设置为 [[127.5]]
    #     std_values=[[127.5, 127.5, 127.5]],  # 对于单通道图像，可以设置为 [[127.5]]
    #     target_platform='rk3588',  # 设置目标平台
    #     # quantize_dtype='int8',
    #     # quantize_algo='normal',
    #     # output_optimize=False,
    #     # output_format='rknnb'
    # )
    print('done')
    # Load model
    print('--> Loading model')
    ret = rknn.load_onnx(model=ONNX_MODEL)
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')
    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset='./dataset.txt')
    # ret = rknn.build(do_quantization=False, dataset='./dataset.txt')
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')
    # Export rknn model
    print('--> Export rknn model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')
    # Set inputs
    img = cv2.imread('./dog_224x224.jpg')
    # img = cv2.imread('./data/gift_test/Havegift/20241213-161415-cb8e0762-f376-45d1-8f36-7dc070990fa5/subimg/cam1_9_tid2_fid(18, 33250169482).png')
    # print('img', img)
    # with open('pixel_values.txt', 'w') as file:
    #    for y in range(img.shape[0]):
    #        for x in range(img.shape[1]):
    #            b, g, r = img[y, x]
    #            file.write(f'{r},{g},{b}\n')
    # img = cv2.imread('./810115161912_810115161912_20240131-145622_0da14e4d-a3da-499f-b512-2d4168ab1c87_front_addGood_70f75407b7ae_29_01.jpg')
    img = cv2.resize(img, (224, 224))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # img = conf.test_transform(img)
    # img = img.numpy()
    # img = img.transpose(1, 2, 0)
    # Init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    # ret = rknn.init_runtime('rk3588')
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')
    # Inference
    print('--> Running model')
    T1 = time.time()
    outputs = rknn.inference(inputs=[img])
    # outputs = rknn.inference(inputs=img)
    T2 = time.time()
    print('消耗时间 >>> {}'.format(T2 - T1))
    with open('result_0415_128.txt', 'a') as f:
        f.write(str(outputs))
    # pdb.set_trace()
    print('***outputs', outputs)
    np.save('./onnx_resnet50v2_0.npy', outputs[0])
    x = outputs[0]
    output = np.exp(x) / np.sum(np.exp(x))
    outputs = [output]
    show_outputs(outputs)
    print('done')
    rknn.release()
--- a/tools/operate_usearch.py
+++ b/tools/operate_usearch.py
@ -0,0 +1,233 @@
 import os
 import numpy as np
 from usearch.index import Index
 import json
 import struct
 def create_index():
    index = Index(
        ndim=256,
        metric='cos',
        # dtype='f32',
        dtype='f16',
        connectivity=32,
        expansion_add=40,  # 128,
        expansion_search=10,  # 64,
        multi=True
    )
    return index
 def compare_feature(features1, features2, model='1'):
    """
    :param model 比对策略
        '0':模拟一个轨迹的图像(所有的图像、或者挑选的若干图像)与标准库,先求每个图片与标准库的最大值，再求所有图片对应最大值的均值
        '1':带对比的所有相似度的均值
        '2':比对1:1的最大值
    :param feature1:
    :param feature2:
    :return:
    """
    similarity_group, similarity_groups = [], []
    if model == '0':
        for feature1 in features1:
            for feature2 in features2[0]:
                similarity = np.dot(feature1, feature2) / (np.linalg.norm(feature1) * np.linalg.norm(feature2))
                similarity_group.append(similarity)
            similarity_groups.append(max(similarity_group))
            similarity_group = []
        return sum(similarity_groups) / len(similarity_groups)
    elif model == '1':
        feature2 = features2[0]
        for feature1 in features1:
            for num in range(len(feature2)):
                similarity = np.dot(feature1, feature2[num]) / (
                            np.linalg.norm(feature1) * np.linalg.norm(feature2[num]))
                similarity_group.append(similarity)
            similarity_groups.append(sum(similarity_group) / len(similarity_group))
            similarity_group = []
        # return sum(similarity_groups)/len(similarity_groups), max(similarity_groups)
        if len(similarity_groups) == 0:
            return -1
        return sum(similarity_groups) / len(similarity_groups)
    elif model == '2':
        feature2 = features2[0]
        for feature1 in features1:
            for num in range(len(feature2)):
                similarity = np.dot(feature1, feature2[num]) / (
                            np.linalg.norm(feature1) * np.linalg.norm(feature2[num]))
                similarity_group.append(similarity)
        return max(similarity_group)
 def get_barcode_feature(data):
    barcode = data['key']
    features = data['value']
    return [barcode] * len(features), features
 def analysis_file(file_path):
    """
    :param file_path:
    :return:
    """
    barcodes, features = [], []
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    for dic in data['total']:
        barcode, feature = get_barcode_feature(dic)
        barcodes.append(barcode)
        features.append(feature)
    return barcodes, features
 def create_base_index(index_file_pth=None,
                      barcodes=None,
                      features=None,
                      save_index_name=None):
    index = create_index()
    if index_file_pth is not None:
        # save_index_name = index_file_pth.split('json')[0] + 'usearch'
        save_index_name = index_file_pth.split('json')[0] + 'data'
        barcodes, features = analysis_file(index_file_pth)
    else:
        assert barcodes is not None and features is not None, 'barcodes and features must be not None'
    for barcode, feature in zip(barcodes, features):
        try:
            index.add(np.array(barcode), np.array(feature))
        except Exception as e:
            print(e)
            continue
    index.save(save_index_name)
 def get_feature_index(index_file_pth=None,
                      barcodes=None):
    assert index_file_pth is not None, 'index_file_pth must be not None'
    index = Index.restore(index_file_pth, view=True)
    feature_lists = index.get(np.array(barcodes))
    print("memory {}     size  {}".format(index.memory_usage, index.size))
    print("feature_lists {}".format(feature_lists))
    return feature_lists
 def search_in_index(query=None,
                    barcode=None,  # barcode -> int or np.ndarray
                    index_name=None,
                    temp_index=False,  # 是否为临时库
                    model='0',
                    ):
    if temp_index:
        assert index_name is not None, 'index_name must be not None'
        index = Index.restore(index_name, view=True)
        if barcode is not None:  # 1:1对比测试
            feature_lists = index.get(np.array(barcode))
            results = compare_feature(query, feature_lists)
        else:
            results = index.search(query, count=5)
        return results
    else:  # 标准库
        assert index_name is not None, 'index_name must be not None'
        index = Index.restore(index_name, view=True)
        if barcode is not None:  # 1:1对比测试
            feature_lists = index.get(np.array(barcode))
            results = compare_feature(query, feature_lists, model)
        else:
            results = index.search(query, count=10)
        return results
 def delete_index(index_name=None, key=None, index=None):
    assert key is not None, 'key must be not None'
    if index is None:
        assert index_name is not None, 'index_name must be not None'
        index = Index.restore(index_name, view=True)
        index.remove(index_name)
    else:
        index.remove(key)
 from scipy.spatial.distance import cdist
 def compute_similarity_matrix(featurelists1, featurelists2):
    """计算图片之间的余弦相似度矩阵"""
    # 计算所有向量对之间的余弦相似度
    cosine_similarities = 1 - cdist(featurelists1, featurelists2, metric='cosine')
    cosine_similarities = np.around(cosine_similarities, decimals=3)
    return cosine_similarities
 def check_usearch_json_diff(index_file_pth, json_file_pth):
    json_features = None
    feature_lists = get_feature_index(index_file_pth, ['6923644272159'])
    with open(json_file_pth, 'r') as json_file:
        json_data = json.load(json_file)
    for data in json_data['total']:
        if data['key'] == '6923644272159':
            json_features = data['value']
    json_features = np.array(json_features)
    feature_lists = np.array(feature_lists[0])
    compute_similarity_matrix(json_features, feature_lists)
 def write_binary_file(filename, datas):
    with open(filename, 'wb') as f:
        # 先写入数据中的key数量（为C++读取提供便利）
        key_count = len(datas)
        f.write(struct.pack('I', key_count))  # 'I'代表无符号整型（4字节）
        for data in datas:
            key = data['key']
            feats = data['value']
            key_bytes = key.encode('utf-8')
            key_len = len(key)
            length_byte = struct.pack('<B', key_len)
            f.write(length_byte)
            #            f.write(struct.pack('Q', len(key_bytes)))
            f.write(key_bytes)
            value_count = len(feats)
            f.write(struct.pack('I', (value_count * 256)))
            # 遍历字典，写入每个key及其对应的浮点数值列表
            for values in feats:
                # 写入每个浮点数值（保留小数点后六位）
                for value in values:
                    # 使用'f'格式（单精度浮点，4字节），并四舍五入保留六位小数
                    value_half = np.float16(value)
                    #                    print(value_half.tobytes())
                    f.write(value_half.tobytes())
 def create_binary_file(json_path, flag=True):
    # 1. 打开JSON文件
    with open(json_path, 'r', encoding='utf-8') as file:
        # 2. 读取并解析JSON文件内容
        data = json.load(file)
    if flag:
        for flag, values in data.items():
            # 逐个写入values中的每个值，保留小数点后六位，每个值占一行
            write_binary_file(index_file_pth.replace('json', 'bin'), values)
    else:
        write_binary_file(json_path.replace('.json', '.bin'), [data])
 def create_binary_files(index_file_pth):
    if os.path.isfile(index_file_pth):
        create_binary_file(index_file_pth)
    else:
        for name in os.listdir(index_file_pth):
            jsonpth = os.sep.join([index_file_pth, name])
            create_binary_file(jsonpth, False)
 if __name__ == '__main__':
    # index_file_pth = '../data/feature_json'  # 生成二进制文件 多文件
    index_file_pth = '../search_library/yunhedian_30-04.json'
    # create_base_index(index_file_pth)  # 生成usearch文件
    create_binary_files(index_file_pth)  # 生成二进制文件  多文件
    # index_file_pth = '../search_library/test_index_10_normal_0717.usearch'
    # # index_file_pth = '../search_library/data_10_normal_0718.index'
    # search_in_index(query='693', index_name=index_file_pth, barcode='6934024590466')
    # # check index data file
    # index_file_pth = '../search_library/data_zhanting.data'
    # # # get_feature_index(index_file_pth, ['6901070602818'])
    # get_feature_index(index_file_pth, ['6923644272159'])
    # index_file_pth = '../search_library/data_zhanting.data'
    # json_file_pth = '../search_library/data_zhanting.json'
    # check_usearch_json_diff(index_file_pth, json_file_pth)
--- a/tools/threshold_partition.py
+++ b/tools/threshold_partition.py
@ -0,0 +1,84 @@
 '''
 现场1:N测试，确定阈值
 '''
 import os
 import numpy as np
 import matplotlib.pyplot as plt
 def showHist(filtered_data):
    Same = filtered_data[:, 1].astype(np.float32)
    Cross = filtered_data[:, 2].astype(np.float32)
    fig, axs = plt.subplots(2, 1)
    axs[0].hist(Same, bins=50, edgecolor='black')
    axs[0].set_xlim([-0.1, 1])
    axs[0].set_title('first')
    axs[1].hist(Cross, bins=50, edgecolor='black')
    axs[1].set_xlim([-0.1, 1])
    axs[1].set_title('second')
    # plt.savefig('plot.png')
    plt.show()
 def get_tartget_list(nested_list):
    filtered_list = np.array(list(filter(lambda x: len(x) >= 2, nested_list)))  # 去除无轨迹的数据
    filtered_correct = filtered_list[filtered_list[:, 0] != 'wrong']  # 获取比对正确的时项
    filtered_wrong = filtered_list[filtered_list[:, 0] == 'wrong']  # 获取比对错误的时项
    showHist(filtered_correct)
    # showHist(filtered_wrong)
    print(filtered_list)
 def deal_process(file_pth):
    flag = False
    event = file_pth.split('\\')[-2]
    target_barcode = file_pth.split('\\')[-2].split('_')[-1]
    temp_list = []
    with open(file_pth, 'r') as f:
        for line in f:
            if 'oneToOne' in line:
                flag = True
                continue
            if flag:
                line = line.replace('\n', '')
                comparison_data = line.split(',')
                forecast_barcode = comparison_data[0]
                value = comparison_data[-1].split(':')[-1]
                if value == '':
                    break
                if len(temp_list) == 0:
                    if forecast_barcode == target_barcode:
                        temp_list.append('correct')
                    else:
                        temp_list.append('wrong')
                temp_list.append(float(value))
        temp_list.append(event)
    return temp_list
 def anaylze_scratch(scratch_pth):
    purchase, back = [], []
    for root, dirs, files in os.walk(scratch_pth):
        if len(root) > 0:
            if len(root.split('_')) == 4:  # 加购
                process = os.path.join(root, 'process.data')
                if not os.path.exists(process):
                    continue
                purchase.append(deal_process(process))
            elif len(root.split('_')) == 3:
                process = os.path.join(root, 'process.data')
                if not os.path.exists(process):
                    continue
                back.append(deal_process(process))
    # get_tartget_list(purchase)
    get_tartget_list(back)
    print(purchase)
 if __name__ == '__main__':
    # scratch_pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\展厅测试\\1108_展厅模型v800测试\\'
    scratch_pth = r'\\192.168.1.28\\share\\测试视频数据以及日志\\各模块测试记录\\展厅测试\\1120_展厅模型v801测试\\扫A放A\\'
    anaylze_scratch(scratch_pth)
--- a/tools/write_feature_json.py
+++ b/tools/write_feature_json.py
@ -0,0 +1,411 @@
 import json
 import os
 import logging
 import numpy as np
 from typing import Dict, List, Optional, Tuple
 from tools.dataset import get_transform
 from model import resnet18
 import torch
 from PIL import Image
 import pandas as pd
 from tqdm import tqdm
 import yaml
 import shutil
 import struct
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 class FeatureExtractor:
    def __init__(self, conf):
        self.conf = conf
        self.model = self.initModel()
        _, self.test_transform = get_transform(self.conf)
        pass
    def initModel(self, inference_model: Optional[str] = None) -> torch.nn.Module:
        """
        Initialize and load the ResNet18 model for inference.
        Args:
            inference_model: Optional path to model weights. Uses conf.test_model if None.
        Returns:
            Loaded and configured PyTorch model in evaluation mode.
        Raises:
            FileNotFoundError: If model weights file is not found
            RuntimeError: If model loading fails
        """
        model_path = inference_model if inference_model else self.conf['models']['checkpoints']
        try:
            # Verify model file exists
            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model weights file not found: {model_path}")
            # Initialize model
            model = resnet18().to(self.conf['base']['device'])
            # Handle multi-GPU case
            if conf['base']['distributed']:
                model = torch.nn.DataParallel(model)
            # Load weights
            state_dict = torch.load(model_path, map_location=conf['base']['device'])
            model.load_state_dict(state_dict)
            model.eval()
            logger.info(f"Successfully loaded model from {model_path}")
            return model
        except Exception as e:
            logger.error(f"Failed to initialize model: {str(e)}")
            raise
    def convert_rgba_to_rgb(self, image_path):
        # 打开图像
        img = Image.open(image_path)
        # 转换图像模式从RGBA到RGB
        # .convert('RGB')会丢弃Alpha通道并转换为纯RGB图像
        if img.mode == 'RGBA':
            # 转换为RGB模式
            img_rgb = img.convert('RGB')
            # 保存转换后的图像
            img_rgb.save(image_path)
            print(f"Image converted from RGBA to RGB and saved to {image_path}")
    def test_preprocess(self, images: list, actionModel=False) -> torch.Tensor:
        res = []
        for img in images:
            try:
                im = self.test_transform(img) if actionModel else self.test_transform(Image.open(img))
                res.append(im)
            except:
                continue
        data = torch.stack(res)
        return data
    def inference(self, images, model, actionModel=False):
        data = self.test_preprocess(images, actionModel)
        if torch.cuda.is_available():
            data = data.to(conf['base']['device'])
        features = model(data)
        if conf['data']['half']:
            features = features.half()
        return features
    def group_image(self, images, batch=64) -> list:
        """Group image paths by batch size"""
        size = len(images)
        res = []
        for i in range(0, size, batch):
            end = min(batch + i, size)
            res.append(images[i:end])
        return res
    def getFeatureList(self, barList, imgList):
        featList = [[] for _ in range(len(barList))]
        for index, image_paths in enumerate(imgList):
            try:
                # Process images in batches
                for batch in self.group_image(image_paths):
                    # Get features for batch
                    features = self.inference(batch, self.model)
                    # Process each feature in batch
                    for feat in features:
                        # Move to CPU and convert to numpy
                        feat_np = feat.squeeze().detach().cpu().numpy()
                        # Normalize first 256 dimensions
                        normalized = self.normalize_256(feat_np[:256])
                        # Combine with remaining dimensions
                        combined = np.concatenate([normalized, feat_np[256:]], axis=0)
                        featList[index].append(combined)
            except Exception as e:
                logger.error(f"Error processing images for index {index}: {str(e)}")
                continue
        return featList
    def get_files(
            self,
            folder: str,
            filter: Optional[List[str]] = None,
            create_single_json: bool = False
    ) -> Dict[str, List[str]]:
        """
        Recursively collect image files from directory structure.
        Args:
            folder: Root directory to scan
            filter: Optional list of barcodes to include
            create_single_json: Whether to create individual JSON files per barcode
        Returns:
            Dictionary mapping barcode names to lists of image paths
        Example:
            {
                "barcode1": ["path/to/img1.jpg", "path/to/img2.jpg"],
                "barcode2": ["path/to/img3.jpg"]
            }
        """
        file_dicts = {}
        total_files = 0
        feature_counts = []
        barcode_count = 0
        subclass = [str(i) for i in range(100)]
        # Validate input directory
        if not os.path.isdir(folder):
            raise ValueError(f"Invalid directory: {folder}")
        # Process each barcode directory
        for root, dirs, files in tqdm(os.walk(folder), desc="Scanning directories"):
            if not dirs:  # Leaf directory (contains images)
                basename = os.path.basename(root)
                if basename in subclass:
                    ori_barcode = root.split('/')[-2]
                    barcode = root.split('/')[-2] + '_' + basename
                else:
                    ori_barcode = basename
                    barcode = basename
                # Apply filter if provided
                if filter and ori_barcode not in filter:
                    continue
                elif len(ori_barcode) > 13 or len(ori_barcode) < 8:
                    logger.warning(f"Skipping invalid barcode {ori_barcode}")
                    with open(conf['save']['error_barcodes'], 'a') as f:
                        f.write(ori_barcode + '\n')
                    f.close()
                    continue
                # Process image files
                if files:
                    image_paths = self._process_image_files(root, files)
                    if not image_paths:
                        continue
                    # Update counters
                    barcode_count += 1
                    file_count = len(image_paths)
                    total_files += file_count
                    feature_counts.append(file_count)
                    # Handle output mode
                    if create_single_json:
                        self._process_single_barcode(barcode, image_paths)
                    else:
                        if barcode.split('_')[-1] == '0':
                            barcode = barcode.split('_')[0]
                        file_dicts[barcode] = image_paths
        # # Log summary
        # logger.info(f"Processed {barcode_count} barcodes with {total_files} total images")
        # logger.debug(f"Image counts per barcode: {feature_counts}")
        # Batch process if not creating individual JSONs
        if not create_single_json and file_dicts:
            self.createFeatureDict(
                file_dicts,
                create_single_json=False,
            )
        return file_dicts
    def _process_image_files(self, root: str, files: List[str]) -> List[str]:
        """Process and validate image files in a directory."""
        valid_paths = []
        for filename in files:
            file_path = os.path.join(root, filename)
            try:
                # Convert RGBA to RGB if needed
                self.convert_rgba_to_rgb(file_path)
                valid_paths.append(file_path)
            except Exception as e:
                logger.warning(f"Skipping invalid image {file_path}: {str(e)}")
        return valid_paths
    def _process_single_barcode(self, barcode: str, image_paths: List[str]):
        """Process a single barcode and create individual JSON file."""
        temp_dict = {barcode: image_paths}
        self.createFeatureDict(
            temp_dict,
            create_single_json=True,
        )
    def normalize_256(self, queFeatList):
        queFeatList = queFeatList / np.linalg.norm(queFeatList)
        return queFeatList
    def img2feature(
            self,
            imgs_dict: Dict[str, List[str]]
    ) -> Tuple[List[str], List[List[np.ndarray]]]:
        """
        Extract features for all images in the dictionary.
        Args:
            imgs_dict: Dictionary mapping barcodes to image paths
            model: Pretrained feature extraction model
            barcode_flag: Whether to include barcode info (unused)
        Returns:
            Tuple containing:
            - List of barcode IDs
            - List of feature lists (one per barcode)
        Raises:
            ValueError: If input dictionary is empty
            RuntimeError: If feature extraction fails
        """
        if not imgs_dict:
            raise ValueError("No images provided for feature extraction")
        try:
            barcode_list = list(imgs_dict.keys())
            image_list = list(imgs_dict.values())
            feature_list = self.getFeatureList(barcode_list, image_list)
            logger.info(f"Successfully extracted features for {len(barcode_list)} barcodes")
            return barcode_list, feature_list
        except Exception as e:
            logger.error(f"Feature extraction failed: {str(e)}")
            raise RuntimeError(f"Feature extraction failed: {str(e)}")
    def createFeatureDict(self, imgs_dict,
                          create_single_json=False):  # imgs->{barcode1:[img1_1...img1_n], barcode2:[img2_1...img2_n]}
        dicts_all = {}
        value_list = []
        barcode_list, imgs_list = self.img2feature(imgs_dict)
        for i in range(len(barcode_list)):
            dicts = {}
            imgs_list_ = []
            for j in range(len(imgs_list[i])):
                imgs_list_.append(imgs_list[i][j].tolist())
            dicts['key'] = barcode_list[i]
            truncated_imgs_list = [subarray[:256] for subarray in imgs_list_]
            dicts['value'] = truncated_imgs_list
            if create_single_json:
                # json_path = os.path.join("./search_library/v8021_overseas/", str(barcode_list[i]) + '.json')
                json_path = os.path.join(self.conf['save']['json_path'], str(barcode_list[i]) + '.json')
                with open(json_path, 'w') as json_file:
                    json.dump(dicts, json_file)
            else:
                value_list.append(dicts)
        if not create_single_json:
            dicts_all['total'] = value_list
            with open(self.conf['save']['json_bin'], 'w') as json_file:
                json.dump(dicts_all, json_file)
            self.create_binary_files(self.conf['save']['json_bin'])
    def statisticsBarcodes(self, pth, filter=None):
        feature_num = 0
        feature_num_lists = []
        nn = 0
        with open(conf['save']['barcodes_statistics'], 'w', encoding='utf-8') as f:
            for barcode in os.listdir(pth):
                print("barcode length >> {}".format(len(barcode)))
                if len(barcode) > 13 or len(barcode) < 8:
                    continue
                if filter is not None:
                    f.writelines(barcode + '\n')
                    if barcode in filter:
                        print(barcode)
                        feature_num += len(os.listdir(os.path.join(pth, barcode)))
                        nn += 1
                else:
                    print('barcode name >>{}'.format(barcode))
                    f.writelines(barcode + '\n')
                    feature_num += len(os.listdir(os.path.join(pth, barcode)))
        feature_num_lists.append(feature_num)
        print("特征总量： {}".format(feature_num))
        print("barcode总量： {}".format(nn))
        f.close()
    def get_shop_barcodes(self, file_path):
        if file_path:
            df = pd.read_excel(file_path)
            column_values = list(df.iloc[:, 6].values)
            column_values = list(map(str, column_values))
            return column_values
        else:
            return None
    def del_base_dir(self, pth):
        for root, dirs, files in os.walk(pth):
            if len(dirs) == 1:
                if dirs[0] == 'base':
                    shutil.rmtree(os.path.join(root, dirs[0]))
    def write_binary_file(self, filename, datas):
        with open(filename, 'wb') as f:
            # 先写入数据中的key数量（为C++读取提供便利）
            key_count = len(datas)
            f.write(struct.pack('I', key_count))  # 'I'代表无符号整型（4字节）
            for data in datas:
                key = data['key']
                feats = data['value']
                key_bytes = key.encode('utf-8')
                key_len = len(key)
                length_byte = struct.pack('<B', key_len)
                f.write(length_byte)
                #            f.write(struct.pack('Q', len(key_bytes)))
                f.write(key_bytes)
                value_count = len(feats)
                f.write(struct.pack('I', (value_count * 256)))
                # 遍历字典，写入每个key及其对应的浮点数值列表
                for values in feats:
                    # 写入每个浮点数值（保留小数点后六位）
                    for value in values:
                        # 使用'f'格式（单精度浮点，4字节），并四舍五入保留六位小数
                        value_half = np.float16(value)
                        #                    print(value_half.tobytes())
                        f.write(value_half.tobytes())
    def create_binary_file(self, json_path, flag=True):
        # 1. 打开JSON文件
        with open(json_path, 'r', encoding='utf-8') as file:
            # 2. 读取并解析JSON文件内容
            data = json.load(file)
        if flag:
            for flag, values in data.items():
                # 逐个写入values中的每个值，保留小数点后六位，每个值占一行
                self.write_binary_file(self.conf['save']['json_bin'].replace('json', 'bin'), values)
        else:
            self.write_binary_file(json_path.replace('.json', '.bin'), [data])
    def create_binary_files(self, index_file_pth):
        if os.path.isfile(index_file_pth):
            self.create_binary_file(index_file_pth)
        else:
            for name in os.listdir(index_file_pth):
                jsonpth = os.sep.join([index_file_pth, name])
                self.create_binary_file(jsonpth, False)
 if __name__ == "__main__":
    with open('../configs/write_feature.yml', 'r') as f:
        conf = yaml.load(f, Loader=yaml.FullLoader)
    ###将图片名称和模型推理特征向量字典存为json文件
    # xlsx_pth = './shop_xlsx/曹家桥门店在售商品表.xlsx'
    # xlsx_pth = None
    # del_base_dir(mg_path)
    extractor = FeatureExtractor(conf)
    column_values = extractor.get_shop_barcodes(conf['data']['xlsx_pth'])
    imgs_dict = extractor.get_files(conf['data']['img_dirs_path'],
                                    filter=column_values,
                                    create_single_json=False)  # False
    extractor.statisticsBarcodes(conf['data']['img_dirs_path'], column_values)
--- a/train_compare.py
+++ b/train_compare.py
@ -0,0 +1,142 @@
 import os
 import os.path as osp
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from tqdm import tqdm
 from model.loss import FocalLoss
 from tools.dataset import load_data
 import matplotlib.pyplot as plt
 from configs import trainer_tools
 import yaml
 with open('configs/scatter.yml', 'r') as f:
    conf = yaml.load(f, Loader=yaml.FullLoader)
 # Data Setup
 train_dataloader, class_num = load_data(training=True, cfg=conf)
 val_dataloader, _ = load_data(training=False, cfg=conf)
 tr_tools = trainer_tools(conf)
 backbone_mapping = tr_tools.get_backbone()
 metric_mapping = tr_tools.get_metric(class_num)
 if conf['models']['backbone'] in backbone_mapping:
    model = backbone_mapping[conf['models']['backbone']]().to(conf['base']['device'])
 else:
    raise ValueError('不支持该模型: {}'.format({conf['models']['backbone']}))
 if conf['training']['metric'] in metric_mapping:
    metric = metric_mapping[conf['training']['metric']]()
 else:
    raise ValueError('不支持的metric类型: {}'.format(conf['training']['metric']))
 if torch.cuda.device_count() > 1 and conf['base']['distributed']:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model)
    metric = nn.DataParallel(metric)
 # Training Setup
 if conf['training']['loss'] == 'focal_loss':
    criterion = FocalLoss(gamma=2)
 else:
    criterion = nn.CrossEntropyLoss()
 optimizer_mapping = tr_tools.get_optimizer(model, metric)
 if conf['training']['optimizer'] in optimizer_mapping:
    optimizer = optimizer_mapping[conf['training']['optimizer']]()
    scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=conf['training']['lr_step'],
        gamma=conf['training']['lr_decay']
    )
 else:
    raise ValueError('不支持的优化器类型: {}'.format(conf['training']['optimizer']))
 # Checkpoints Setup
 checkpoints = conf['training']['checkpoints']
 os.makedirs(checkpoints, exist_ok=True)
 if __name__ == '__main__':
    print('backbone>{}  '.format(conf['models']['backbone']),
          'metric>{}  '.format(conf['training']['metric']),
          'checkpoints>{}  '.format(conf['training']['checkpoints']),
          )
    train_losses = []
    val_losses = []
    epochs = []
    temp_loss = 100
    if conf['training']['restore']:
        print('load pretrain model: {}'.format(conf['training']['restore_model']))
        model.load_state_dict(torch.load(conf['training']['restore_model'],
                                         map_location=conf['base']['device']))
    for e in range(conf['training']['epochs']):
        train_loss = 0
        model.train()
        for train_data, train_labels in tqdm(train_dataloader,
                                             desc="Epoch {}/{}"
                                                     .format(e, conf['training']['epochs']),
                                             ascii=True,
                                             total=len(train_dataloader)):
            train_data = train_data.to(conf['base']['device'])
            train_labels = train_labels.to(conf['base']['device'])
            train_embeddings = model(train_data).to(conf['base']['device'])  # [256,512]
            # pdb.set_trace()
            if not conf['training']['metric'] == 'softmax':
                thetas = metric(train_embeddings, train_labels)  # [256,357]
            else:
                thetas = metric(train_embeddings)
            tloss = criterion(thetas, train_labels)
            optimizer.zero_grad()
            tloss.backward()
            optimizer.step()
            train_loss += tloss.item()
        train_lossAvg = train_loss / len(train_dataloader)
        train_losses.append(train_lossAvg)
        epochs.append(e)
        val_loss = 0
        model.eval()
        with torch.no_grad():
            for val_data, val_labels in tqdm(val_dataloader, desc="val",
                                             ascii=True, total=len(val_dataloader)):
                val_data = val_data.to(conf['base']['device'])
                val_labels = val_labels.to(conf['base']['device'])
                val_embeddings = model(val_data).to(conf['base']['device'])
                if not conf['training']['metric'] == 'softmax':
                    thetas = metric(val_embeddings, val_labels)
                else:
                    thetas = metric(val_embeddings)
                vloss = criterion(thetas, val_labels)
                val_loss += vloss.item()
            val_lossAvg = val_loss / len(val_dataloader)
            val_losses.append(val_lossAvg)
            if val_lossAvg < temp_loss:
                if torch.cuda.device_count() > 1:
                    torch.save(model.state_dict(), osp.join(checkpoints, 'best.pth'))
                else:
                    torch.save(model.state_dict(), osp.join(checkpoints, 'best.pth'))
                temp_loss = val_lossAvg
        scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']
        log_info = ("Epoch {}/{},  train_loss: {}, val_loss: {} lr:{}"
                    .format(e, conf['training']['epochs'], train_lossAvg, val_lossAvg, current_lr))
        print(log_info)
        # 写入日志文件
        with open(osp.join(conf['logging']['logging_dir']), 'a') as f:
            f.write(log_info + '\n')
        print("第%d个epoch的学习率：%f" % (e, current_lr))
    if torch.cuda.device_count() > 1 and conf['base']['distributed']:
        torch.save(model.module.state_dict(), osp.join(checkpoints, 'last.pth'))
    else:
        torch.save(model.state_dict(), osp.join(checkpoints, 'last.pth'))
    plt.plot(epochs, train_losses, color='blue')
    plt.plot(epochs, val_losses, color='red')
    # plt.savefig('lossMobilenetv3.png')
    plt.savefig('loss/mobilenetv3Large_2250_0316.png')
--- a/train_distill.py
+++ b/train_distill.py
@ -0,0 +1,205 @@
 """
 ResNet50蒸馏训练ResNet18实现
 学生网络使用ArcFace损失
 支持单机双卡训练
 """
 import os
 import torch
 import torch.nn as nn
 import torch.distributed as dist
 import torch.multiprocessing as mp
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.optim.lr_scheduler import CosineAnnealingLR
 from torch.cuda.amp import GradScaler
 from model import resnet18, resnet50, ArcFace
 from tqdm import tqdm
 import torch.nn.functional as F
 from tools.dataset import load_data
 # from config import config as conf
 import yaml
 import math
 def setup(rank, world_size):
    os.environ['MASTER_ADDR'] = '0.0.0.0'
    os.environ['MASTER_PORT'] = '12355'
    dist.init_process_group("nccl", rank=rank, world_size=world_size)
 def cleanup():
    dist.destroy_process_group()
 class DistillTrainer:
    def __init__(self, rank, world_size, conf):
        self.rank = rank
        self.world_size = world_size
        self.device = torch.device(f'cuda:{rank}')
        # 初始化模型
        self.teacher = resnet50(pretrained=True, scale=conf['models']['channel_ratio']).to(self.device)
        self.student = resnet18(pretrained=True, scale=conf['models']['student_channel_ratio']).to(self.device)
        # 加载预训练教师模型
        # teacher_path = os.path.join('checkpoints', 'resnet50_0519', 'best.pth')
        teacher_path = conf['models']['teacher_model_path']
        if os.path.exists(teacher_path):
            teacher_state = torch.load(teacher_path, map_location=self.device)
            new_state_dict = {}
            for k, v in teacher_state.items():
                if k.startswith('module.'):
                    new_state_dict[k[7:]] = v  # 去除前7个字符'module.'
                else:
                    new_state_dict[k] = v
            # 加载处理后的状态字典
            self.teacher.load_state_dict(new_state_dict, strict=False)
            if self.rank == 0:
                print(f"Successfully loaded teacher model from {teacher_path}")
        else:
            raise FileNotFoundError(f"Teacher model weights not found at {teacher_path}")
        # 数据加载
        self.train_loader, num_classes = load_data(training=True, cfg=conf)
        self.val_loader, _ = load_data(training=False, cfg=conf)
        # ArcFace损失
        self.metric = ArcFace(conf['base']['embedding_size'], num_classes).to(self.device)
        # 分布式训练
        if world_size > 1:
            self.teacher = DDP(self.teacher, device_ids=[rank])
            self.student = DDP(self.student, device_ids=[rank])
            self.metric = DDP(self.metric, device_ids=[rank])
        # 优化器
        self.optimizer = torch.optim.SGD([
            {'params': self.student.parameters()},
            {'params': self.metric.parameters()}
        ], lr=conf['training']['lr'], momentum=0.9, weight_decay=5e-4)
        self.scheduler = CosineAnnealingLR(self.optimizer, T_max=conf['training']['epochs'])
        self.scaler = GradScaler()
        # 损失函数
        self.arcface_loss = nn.CrossEntropyLoss()
        self.distill_loss = nn.KLDivLoss(reduction='batchmean')
        self.conf = conf
    def cosine_annealing(self, epoch, total_epochs, initial_weight, final_weight=0.1):
        """
        余弦退火法动态调整蒸馏权重
        参数：
            epoch: 当前训练轮次
            total_epochs: 总训练轮次
            initial_weight: 初始蒸馏权重（如0.8）
            final_weight: 最终蒸馏权重（如0.1）
        返回：
            当前轮次的蒸馏权重
        """
        return final_weight + 0.5 * (initial_weight - final_weight) * (1 + math.cos(math.pi * epoch / total_epochs))
    def train_epoch(self, epoch):
        self.teacher.eval()
        self.student.train()
        if self.rank == 0:
            print(f"\nTeacher network type: {type(self.teacher)}")
            print(f"Student network type: {type(self.student)}")
        total_loss = 0
        for data, labels in tqdm(self.train_loader, desc=f"Epoch {epoch}"):
            data = data.to(self.device)
            labels = labels.to(self.device)
            # with autocast():
                # 教师输出
            with torch.no_grad():
                teacher_logits = self.teacher(data)
            # 学生输出
            student_features = self.student(data)
            student_logits = self.metric(student_features, labels)
            # 计算损失
            arc_loss = self.arcface_loss(student_logits, labels)
            distill_loss = self.distill_loss(
                F.log_softmax(student_features / self.conf['training']['temperature'], dim=1),
                F.softmax(teacher_logits / self.conf['training']['temperature'], dim=1)
            ) * (self.conf['training']['temperature'] ** 2)  # 温度缩放后需要乘以T^2保持梯度规模
            current_distill_weight = self.cosine_annealing(epoch, self.conf['training']['epochs'], self.conf['training']['distill_weight'])
            loss = (1-current_distill_weight) * arc_loss + current_distill_weight * distill_loss
            self.optimizer.zero_grad()
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            total_loss += loss.item()
        self.scheduler.step()
        return total_loss / len(self.train_loader)
    def validate(self):
        self.student.eval()
        total_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for data, labels in self.val_loader:
                data = data.to(self.device)
                labels = labels.to(self.device)
                features = self.student(data)
                logits = self.metric(features, labels)
                loss = self.arcface_loss(logits, labels)
                total_loss += loss.item()
                _, predicted = torch.max(logits.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        return total_loss / len(self.val_loader), correct / total
    def save_checkpoint(self, epoch, is_best=False):
        if self.rank != 0:
            return
        state = {
            'epoch': epoch,
            'student_state_dict': self.student.state_dict(),
            'metric_state_dict': self.metric.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
        }
        filename = 'best.pth' if is_best else f'checkpoint_{epoch}.pth'
        if not os.path.exists(self.conf['training']['checkpoints']):
            os.makedirs(self.conf['training']['checkpoints'])
        if filename != 'best.pth':
            torch.save(state, os.path.join(self.conf['training']['checkpoints'], filename))
        else:
            torch.save(state['student_state_dict'], os.path.join(self.conf['training']['checkpoints'], filename))
 def train(rank, world_size):
    setup(rank, world_size)
    with open('configs/distill.yml', 'r') as f:
        conf = yaml.load(f, Loader=yaml.FullLoader)
    trainer = DistillTrainer(rank, world_size, conf)
    best_acc = 0
    for epoch in range(conf['training']['epochs']):
        train_loss = trainer.train_epoch(epoch)
        val_loss, val_acc = trainer.validate()
        if rank == 0:
            print(f"Epoch {epoch}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
            if val_acc > best_acc:
                best_acc = val_acc
                trainer.save_checkpoint(epoch, is_best=True)
    cleanup()
 if __name__ == '__main__':
    world_size = torch.cuda.device_count()
    if world_size > 1:
        mp.spawn(train, args=(world_size,), nprocs=world_size, join=True)
    else:
        train(0, 1)