ieemoo-ai-searchv2/utils/classify.py

# coding=utf-8
# /usr/bin/env pythpn

'''
Author: yinhao
Email: yinhao_x@163.com
Wechat: xss_yinhao
Github: http://github.com/yinhaoxs
data: 2019-11-23 18:29
desc:
'''

import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
from torch import nn
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import cv2
import shutil
import numpy as np
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import os
import time
from collections import OrderedDict

# config.py
BATCH_SIZE = 16
PROPOSAL_NUM = 6
CAT_NUM = 4
INPUT_SIZE = (448, 448)  # (w, h)
DROP_OUT = 0.5
CLASS_NUM = 37


# resnet.py
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        feature1 = x
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = nn.Dropout(p=0.5)(x)
        feature2 = x
        x = self.fc(x)

        return x, feature1, feature2


# model.py
class ProposalNet(nn.Module):
    def __init__(self):
        super(ProposalNet, self).__init__()
        self.down1 = nn.Conv2d(2048, 128, 3, 1, 1)
        self.down2 = nn.Conv2d(128, 128, 3, 2, 1)
        self.down3 = nn.Conv2d(128, 128, 3, 2, 1)
        self.ReLU = nn.ReLU()
        self.tidy1 = nn.Conv2d(128, 6, 1, 1, 0)
        self.tidy2 = nn.Conv2d(128, 6, 1, 1, 0)
        self.tidy3 = nn.Conv2d(128, 9, 1, 1, 0)

    def forward(self, x):
        batch_size = x.size(0)
        d1 = self.ReLU(self.down1(x))
        d2 = self.ReLU(self.down2(d1))
        d3 = self.ReLU(self.down3(d2))
        t1 = self.tidy1(d1).view(batch_size, -1)
        t2 = self.tidy2(d2).view(batch_size, -1)
        t3 = self.tidy3(d3).view(batch_size, -1)
        return torch.cat((t1, t2, t3), dim=1)


class AttentionNet(nn.Module):
    def __init__(self, topN=4):
        super(attention_net, self).__init__()
        self.pretrained_model = ResNet(Bottleneck, [3, 4, 6, 3])
        self.pretrained_model.avgpool = nn.AdaptiveAvgPool2d(1)
        self.pretrained_model.fc = nn.Linear(512 * 4, 200)
        self.proposal_net = ProposalNet()
        self.topN = topN
        self.concat_net = nn.Linear(2048 * (CAT_NUM + 1), 200)
        self.partcls_net = nn.Linear(512 * 4, 200)
        _, edge_anchors, _ = generate_default_anchor_maps()
        self.pad_side = 224
        self.edge_anchors = (edge_anchors + 224).astype(np.int)

    def forward(self, x):
        resnet_out, rpn_feature, feature = self.pretrained_model(x)
        x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0)
        batch = x.size(0)
        # we will reshape rpn to shape: batch * nb_anchor
        rpn_score = self.proposal_net(rpn_feature.detach())
        all_cdds = [
            np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1)
            for x in rpn_score.data.cpu().numpy()]
        top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds]
        top_n_cdds = np.array(top_n_cdds)
        top_n_index = top_n_cdds[:, :, -1].astype(np.int)
        top_n_index = torch.from_numpy(top_n_index).cuda()
        top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index)
        part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda()
        for i in range(batch):
            for j in range(self.topN):
                [y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int)
                part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear',
                                                      align_corners=True)
        part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
        _, _, part_features = self.pretrained_model(part_imgs.detach())
        part_feature = part_features.view(batch, self.topN, -1)
        part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
        part_feature = part_feature.view(batch, -1)
        # concat_logits have the shape: B*200
        concat_out = torch.cat([part_feature, feature], dim=1)
        concat_logits = self.concat_net(concat_out)
        raw_logits = resnet_out
        # part_logits have the shape: B*N*200
        part_logits = self.partcls_net(part_features).view(batch, self.topN, -1)
        return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]


def list_loss(logits, targets):
    temp = F.log_softmax(logits, -1)
    loss = [-temp[i][targets[i].item()] for i in range(logits.size(0))]
    return torch.stack(loss)


def ranking_loss(score, targets, proposal_num=PROPOSAL_NUM):
    loss = Variable(torch.zeros(1).cuda())
    batch_size = score.size(0)
    for i in range(proposal_num):
        targets_p = (targets > targets[:, i].unsqueeze(1)).type(torch.cuda.FloatTensor)
        pivot = score[:, i].unsqueeze(1)
        loss_p = (1 - pivot + score) * targets_p
        loss_p = torch.sum(F.relu(loss_p))
        loss += loss_p
    return loss / batch_size


# anchors.py
_default_anchors_setting = (
    dict(layer='p3', stride=32, size=48, scale=[2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
    dict(layer='p4', stride=64, size=96, scale=[2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
    dict(layer='p5', stride=128, size=192, scale=[1, 2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
)


def generate_default_anchor_maps(anchors_setting=None, input_shape=INPUT_SIZE):
    """
    generate default anchor
    :param anchors_setting: all informations of anchors
    :param input_shape: shape of input images, e.g. (h, w)
    :return: center_anchors: # anchors * 4 (oy, ox, h, w)
             edge_anchors: # anchors * 4 (y0, x0, y1, x1)
             anchor_area: # anchors * 1 (area)
    """
    if anchors_setting is None:
        anchors_setting = _default_anchors_setting

    center_anchors = np.zeros((0, 4), dtype=np.float32)
    edge_anchors = np.zeros((0, 4), dtype=np.float32)
    anchor_areas = np.zeros((0,), dtype=np.float32)
    input_shape = np.array(input_shape, dtype=int)

    for anchor_info in anchors_setting:

        stride = anchor_info['stride']
        size = anchor_info['size']
        scales = anchor_info['scale']
        aspect_ratios = anchor_info['aspect_ratio']

        output_map_shape = np.ceil(input_shape.astype(np.float32) / stride)
        output_map_shape = output_map_shape.astype(np.int)
        output_shape = tuple(output_map_shape) + (4,)
        ostart = stride / 2.
        oy = np.arange(ostart, ostart + stride * output_shape[0], stride)
        oy = oy.reshape(output_shape[0], 1)
        ox = np.arange(ostart, ostart + stride * output_shape[1], stride)
        ox = ox.reshape(1, output_shape[1])
        center_anchor_map_template = np.zeros(output_shape, dtype=np.float32)
        center_anchor_map_template[:, :, 0] = oy
        center_anchor_map_template[:, :, 1] = ox
        for scale in scales:
            for aspect_ratio in aspect_ratios:
                center_anchor_map = center_anchor_map_template.copy()
                center_anchor_map[:, :, 2] = size * scale / float(aspect_ratio) ** 0.5
                center_anchor_map[:, :, 3] = size * scale * float(aspect_ratio) ** 0.5

                edge_anchor_map = np.concatenate((center_anchor_map[..., :2] - center_anchor_map[..., 2:4] / 2.,
                                                  center_anchor_map[..., :2] + center_anchor_map[..., 2:4] / 2.),
                                                 axis=-1)
                anchor_area_map = center_anchor_map[..., 2] * center_anchor_map[..., 3]
                center_anchors = np.concatenate((center_anchors, center_anchor_map.reshape(-1, 4)))
                edge_anchors = np.concatenate((edge_anchors, edge_anchor_map.reshape(-1, 4)))
                anchor_areas = np.concatenate((anchor_areas, anchor_area_map.reshape(-1)))

    return center_anchors, edge_anchors, anchor_areas


def hard_nms(cdds, topn=10, iou_thresh=0.25):
    if not (type(cdds).__module__ == 'numpy' and len(cdds.shape) == 2 and cdds.shape[1] >= 5):
        raise TypeError('edge_box_map should be N * 5+ ndarray')

    cdds = cdds.copy()
    indices = np.argsort(cdds[:, 0])
    cdds = cdds[indices]
    cdd_results = []

    res = cdds

    while res.any():
        cdd = res[-1]
        cdd_results.append(cdd)
        if len(cdd_results) == topn:
            return np.array(cdd_results)
        res = res[:-1]

        start_max = np.maximum(res[:, 1:3], cdd[1:3])
        end_min = np.minimum(res[:, 3:5], cdd[3:5])
        lengths = end_min - start_max
        intersec_map = lengths[:, 0] * lengths[:, 1]
        intersec_map[np.logical_or(lengths[:, 0] < 0, lengths[:, 1] < 0)] = 0
        iou_map_cur = intersec_map / ((res[:, 3] - res[:, 1]) * (res[:, 4] - res[:, 2]) + (cdd[3] - cdd[1]) * (
                cdd[4] - cdd[2]) - intersec_map)
        res = res[iou_map_cur < iou_thresh]

    return np.array(cdd_results)


#### -------------------------------如何定义batch的读写方式-------------------------------
# 默认读写方式
def default_loader(path):
    try:
        img = Image.open(path).convert("RGB")
        if img is not None:
            return img
    except:
        print("error image:{}".format(path))


def opencv_isvalid(img_path):
    img_bgr = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return img_bgr


# 判断图片是否为无效
def IsValidImage(img_path):
    vaild = True
    if img_path.endswith(".tif") or img_path.endswith(".tiff"):
        vaild = False
        return vaild
    try:
        img = opencv_isvalid(img_path)
        if img is None:
            vaild = False
        return vaild
    except:
        vaild = False
        return vaild


class MyDataset(Dataset):
    def __init__(self, dir_path, transform=None, loader=default_loader):
        fh, imgs = list(), list()
        num = 0
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                try:
                    img_path = os.path.join(root + os.sep, file)
                    num += 1
                    if IsValidImage(img_path):
                        fh.append(img_path)
                    else:
                        os.remove(img_path)

                except:
                    print("image is broken")
        print("total images is:{}".format(num))

        for line in fh:
            line = line.strip()
            imgs.append(line)

        self.imgs = imgs
        self.transform = transform
        self.loader = loader

    def __getitem__(self, item):
        fh = self.imgs[item]
        img = self.loader(fh)
        if self.transform is not None:
            img = self.transform(img)
        return fh, img

    def __len__(self):
        return len(self.imgs)


#### -------------------------------如何定义batch的读写方式-------------------------------


#### -------------------------------图像模糊的定义-------------------------------
def variance_of_laplacian(image):
    return cv2.Laplacian(image, cv2.CV_64f).var()


## 如何定义接口函数
def imgQualJudge(img, QA_THRESHOLD):
    '''
    :param img:
    :param QA_THRESHOLD: 越高越清晰
    :return: 是否模糊，0为模糊，1为清晰
    '''

    norheight = 1707
    norwidth = 1280
    flag = 0
    # 筛选尺寸
    if max(img.shape[0], img.shape[1]) < 320:
        flag = '10002'
        return flag

    # 模糊筛选部分
    if img.shape[0] <= img.shape[1]:
        size1 = (norheight, norwidth)
        timage = cv2.resize(img, size1)
    else:
        size2 = (norwidth, norheight)
        timage = cv2.resize(img, size2)

    tgray = cv2.cvtColor(timage, cv2.COLOR_BGR2GRAY)
    halfgray = tgray[0:int(tgray.shape[0] / 2), 0:tgray.shape[1]]
    norgrayImg = np.zeros(halfgray.shape, np.int8)
    cv2.normalize(halfgray, norgrayImg, 0, 255, cv2.NORM_MINMAX)
    fm = variance_of_laplacian(norgrayImg)  # 模糊值
    if fm < QA_THRESHOLD:
        flag = '10001'
        return flag
    return flag


def process(img_path):
    img = Image.open(img_path).convert("RGB")
    valid = True
    low_quality = "10001"
    size_error = "10002"

    flag = imgQualJudge(np.array(img), 5)
    if flag == low_quality or flag == size_error or not img or 0 in np.asarray(img).shape[:2]:
        valid = False

    return valid


#### -------------------------------图像模糊的定义-------------------------------

def build_dict():
    dict_club = dict()
    dict_club[0] = ["身份证", 0.999999]
    dict_club[1] = ["校园卡", 0.890876]
    return dict_club


class Classifier():
    def __init__(self):
        self.device = torch.device('cuda')
        self.class_id_name_dict = build_dict()
        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.input_size = 448
        self.use_cuda = torch.cuda.is_available()
        self.model = AttentionNet(topN=4)
        self.model.eval()

        checkpoint = torch.load("./.ckpt")
        newweights = checkpoint['net_state_dict']

        # 多卡测试转为单卡
        new_state_dic = OrderedDict()
        for k, v in newweights.items():
            name = k[7:] if k.startwith("module.") else k
            new_state_dic[name] = v

        self.model.load_state_dict(new_state_dic)
        self.model = self.model.to(self.device)

    def evalute(self, dir_path):
        data = MyDataset(dir_path, transform=self.preprocess)
        dataloader = DataLoader(dataset=data, batch_size=32, num_workers=8)

        self.model.eval()
        with torch.no_grad():
            num = 0
            for i, (data, path) in enumerate(dataloader, 1):
                data = data.to(self.device)
                output = self.model(data)
                for j in range(len(data)):
                    img_path = path[j]
                    img_output = output[1][j]
                    score, label, type = self.postprocess(img_output)
                    out_dict, score = self.process(score, label, type)
                    class_id = out_dict["results"]["class2"]["code"]
                    num += 1
                    if class_id != '00038':
                        os.remove(img_path)
                    else:
                        continue

    def preprocess(self, img):
        img = transforms.Resize((600, 600), Image.BILINEAR)(img)
        img = transforms.CenterCrop(self.input_size)(img)
        img = transforms.ToTensor()(img)
        img = transforms.Normalize(self.mean, self.std)

    def postprocess(self, output):
        pred_logits = F.softmax(output, dim=0)
        score, label = pred_logits.max(0)
        score = score.item()
        label = label.item()
        type = self.class_id_name_dict[label][0]
        return score, label, type

    def process(self, score, label, type):
        success_code = "200"
        lower_conf_code = "10008"

        threshold = float(self.class_id_name_dict[label][1])
        if threshold > 0.99:
            threshold = 0.99
        if threshold < 0.9:
            threshold = 0.9
        ## 设置查勘图片较低的阈值
        if label == 38:
            threshold = 0.5

        if score > threshold:
            status_code = success_code
            pred_label = str(label).zfill(5)
            print("pred_label:", pred_label)
            return {"code:": status_code, "message": '图像分类成功',
                    "results": {"class2": {'code': pred_label, 'name': type}}}, score
        else:
            status_code = lower_conf_code
            return {"code:": status_code, "message": '图像分类置信度低，不返回结果',
                    "results": {"class2": {'code': '', 'name': ''}}}, score


def class_results(img_dir):
    Classifier().evalute(img_dir)


if __name__ == "__main__":
    pass