Files
ieemoo-ai-searchv2/utils/classify.py
2022-11-22 15:32:06 +08:00

583 lines
20 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
# /usr/bin/env pythpn
'''
Author: yinhao
Email: yinhao_x@163.com
Wechat: xss_yinhao
Github: http://github.com/yinhaoxs
data: 2019-11-23 18:29
desc:
'''
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
from torch import nn
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import cv2
import shutil
import numpy as np
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import os
import time
from collections import OrderedDict
# config.py
BATCH_SIZE = 16
PROPOSAL_NUM = 6
CAT_NUM = 4
INPUT_SIZE = (448, 448) # (w, h)
DROP_OUT = 0.5
CLASS_NUM = 37
# resnet.py
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
feature1 = x
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = nn.Dropout(p=0.5)(x)
feature2 = x
x = self.fc(x)
return x, feature1, feature2
# model.py
class ProposalNet(nn.Module):
def __init__(self):
super(ProposalNet, self).__init__()
self.down1 = nn.Conv2d(2048, 128, 3, 1, 1)
self.down2 = nn.Conv2d(128, 128, 3, 2, 1)
self.down3 = nn.Conv2d(128, 128, 3, 2, 1)
self.ReLU = nn.ReLU()
self.tidy1 = nn.Conv2d(128, 6, 1, 1, 0)
self.tidy2 = nn.Conv2d(128, 6, 1, 1, 0)
self.tidy3 = nn.Conv2d(128, 9, 1, 1, 0)
def forward(self, x):
batch_size = x.size(0)
d1 = self.ReLU(self.down1(x))
d2 = self.ReLU(self.down2(d1))
d3 = self.ReLU(self.down3(d2))
t1 = self.tidy1(d1).view(batch_size, -1)
t2 = self.tidy2(d2).view(batch_size, -1)
t3 = self.tidy3(d3).view(batch_size, -1)
return torch.cat((t1, t2, t3), dim=1)
class AttentionNet(nn.Module):
def __init__(self, topN=4):
super(attention_net, self).__init__()
self.pretrained_model = ResNet(Bottleneck, [3, 4, 6, 3])
self.pretrained_model.avgpool = nn.AdaptiveAvgPool2d(1)
self.pretrained_model.fc = nn.Linear(512 * 4, 200)
self.proposal_net = ProposalNet()
self.topN = topN
self.concat_net = nn.Linear(2048 * (CAT_NUM + 1), 200)
self.partcls_net = nn.Linear(512 * 4, 200)
_, edge_anchors, _ = generate_default_anchor_maps()
self.pad_side = 224
self.edge_anchors = (edge_anchors + 224).astype(np.int)
def forward(self, x):
resnet_out, rpn_feature, feature = self.pretrained_model(x)
x_pad = F.pad(x, (self.pad_side, self.pad_side, self.pad_side, self.pad_side), mode='constant', value=0)
batch = x.size(0)
# we will reshape rpn to shape: batch * nb_anchor
rpn_score = self.proposal_net(rpn_feature.detach())
all_cdds = [
np.concatenate((x.reshape(-1, 1), self.edge_anchors.copy(), np.arange(0, len(x)).reshape(-1, 1)), axis=1)
for x in rpn_score.data.cpu().numpy()]
top_n_cdds = [hard_nms(x, topn=self.topN, iou_thresh=0.25) for x in all_cdds]
top_n_cdds = np.array(top_n_cdds)
top_n_index = top_n_cdds[:, :, -1].astype(np.int)
top_n_index = torch.from_numpy(top_n_index).cuda()
top_n_prob = torch.gather(rpn_score, dim=1, index=top_n_index)
part_imgs = torch.zeros([batch, self.topN, 3, 224, 224]).cuda()
for i in range(batch):
for j in range(self.topN):
[y0, x0, y1, x1] = top_n_cdds[i][j, 1:5].astype(np.int)
part_imgs[i:i + 1, j] = F.interpolate(x_pad[i:i + 1, :, y0:y1, x0:x1], size=(224, 224), mode='bilinear',
align_corners=True)
part_imgs = part_imgs.view(batch * self.topN, 3, 224, 224)
_, _, part_features = self.pretrained_model(part_imgs.detach())
part_feature = part_features.view(batch, self.topN, -1)
part_feature = part_feature[:, :CAT_NUM, ...].contiguous()
part_feature = part_feature.view(batch, -1)
# concat_logits have the shape: B*200
concat_out = torch.cat([part_feature, feature], dim=1)
concat_logits = self.concat_net(concat_out)
raw_logits = resnet_out
# part_logits have the shape: B*N*200
part_logits = self.partcls_net(part_features).view(batch, self.topN, -1)
return [raw_logits, concat_logits, part_logits, top_n_index, top_n_prob]
def list_loss(logits, targets):
temp = F.log_softmax(logits, -1)
loss = [-temp[i][targets[i].item()] for i in range(logits.size(0))]
return torch.stack(loss)
def ranking_loss(score, targets, proposal_num=PROPOSAL_NUM):
loss = Variable(torch.zeros(1).cuda())
batch_size = score.size(0)
for i in range(proposal_num):
targets_p = (targets > targets[:, i].unsqueeze(1)).type(torch.cuda.FloatTensor)
pivot = score[:, i].unsqueeze(1)
loss_p = (1 - pivot + score) * targets_p
loss_p = torch.sum(F.relu(loss_p))
loss += loss_p
return loss / batch_size
# anchors.py
_default_anchors_setting = (
dict(layer='p3', stride=32, size=48, scale=[2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
dict(layer='p4', stride=64, size=96, scale=[2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
dict(layer='p5', stride=128, size=192, scale=[1, 2 ** (1. / 3.), 2 ** (2. / 3.)], aspect_ratio=[0.667, 1, 1.5]),
)
def generate_default_anchor_maps(anchors_setting=None, input_shape=INPUT_SIZE):
"""
generate default anchor
:param anchors_setting: all informations of anchors
:param input_shape: shape of input images, e.g. (h, w)
:return: center_anchors: # anchors * 4 (oy, ox, h, w)
edge_anchors: # anchors * 4 (y0, x0, y1, x1)
anchor_area: # anchors * 1 (area)
"""
if anchors_setting is None:
anchors_setting = _default_anchors_setting
center_anchors = np.zeros((0, 4), dtype=np.float32)
edge_anchors = np.zeros((0, 4), dtype=np.float32)
anchor_areas = np.zeros((0,), dtype=np.float32)
input_shape = np.array(input_shape, dtype=int)
for anchor_info in anchors_setting:
stride = anchor_info['stride']
size = anchor_info['size']
scales = anchor_info['scale']
aspect_ratios = anchor_info['aspect_ratio']
output_map_shape = np.ceil(input_shape.astype(np.float32) / stride)
output_map_shape = output_map_shape.astype(np.int)
output_shape = tuple(output_map_shape) + (4,)
ostart = stride / 2.
oy = np.arange(ostart, ostart + stride * output_shape[0], stride)
oy = oy.reshape(output_shape[0], 1)
ox = np.arange(ostart, ostart + stride * output_shape[1], stride)
ox = ox.reshape(1, output_shape[1])
center_anchor_map_template = np.zeros(output_shape, dtype=np.float32)
center_anchor_map_template[:, :, 0] = oy
center_anchor_map_template[:, :, 1] = ox
for scale in scales:
for aspect_ratio in aspect_ratios:
center_anchor_map = center_anchor_map_template.copy()
center_anchor_map[:, :, 2] = size * scale / float(aspect_ratio) ** 0.5
center_anchor_map[:, :, 3] = size * scale * float(aspect_ratio) ** 0.5
edge_anchor_map = np.concatenate((center_anchor_map[..., :2] - center_anchor_map[..., 2:4] / 2.,
center_anchor_map[..., :2] + center_anchor_map[..., 2:4] / 2.),
axis=-1)
anchor_area_map = center_anchor_map[..., 2] * center_anchor_map[..., 3]
center_anchors = np.concatenate((center_anchors, center_anchor_map.reshape(-1, 4)))
edge_anchors = np.concatenate((edge_anchors, edge_anchor_map.reshape(-1, 4)))
anchor_areas = np.concatenate((anchor_areas, anchor_area_map.reshape(-1)))
return center_anchors, edge_anchors, anchor_areas
def hard_nms(cdds, topn=10, iou_thresh=0.25):
if not (type(cdds).__module__ == 'numpy' and len(cdds.shape) == 2 and cdds.shape[1] >= 5):
raise TypeError('edge_box_map should be N * 5+ ndarray')
cdds = cdds.copy()
indices = np.argsort(cdds[:, 0])
cdds = cdds[indices]
cdd_results = []
res = cdds
while res.any():
cdd = res[-1]
cdd_results.append(cdd)
if len(cdd_results) == topn:
return np.array(cdd_results)
res = res[:-1]
start_max = np.maximum(res[:, 1:3], cdd[1:3])
end_min = np.minimum(res[:, 3:5], cdd[3:5])
lengths = end_min - start_max
intersec_map = lengths[:, 0] * lengths[:, 1]
intersec_map[np.logical_or(lengths[:, 0] < 0, lengths[:, 1] < 0)] = 0
iou_map_cur = intersec_map / ((res[:, 3] - res[:, 1]) * (res[:, 4] - res[:, 2]) + (cdd[3] - cdd[1]) * (
cdd[4] - cdd[2]) - intersec_map)
res = res[iou_map_cur < iou_thresh]
return np.array(cdd_results)
#### -------------------------------如何定义batch的读写方式-------------------------------
# 默认读写方式
def default_loader(path):
try:
img = Image.open(path).convert("RGB")
if img is not None:
return img
except:
print("error image:{}".format(path))
def opencv_isvalid(img_path):
img_bgr = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
return img_bgr
# 判断图片是否为无效
def IsValidImage(img_path):
vaild = True
if img_path.endswith(".tif") or img_path.endswith(".tiff"):
vaild = False
return vaild
try:
img = opencv_isvalid(img_path)
if img is None:
vaild = False
return vaild
except:
vaild = False
return vaild
class MyDataset(Dataset):
def __init__(self, dir_path, transform=None, loader=default_loader):
fh, imgs = list(), list()
num = 0
for root, dirs, files in os.walk(dir_path):
for file in files:
try:
img_path = os.path.join(root + os.sep, file)
num += 1
if IsValidImage(img_path):
fh.append(img_path)
else:
os.remove(img_path)
except:
print("image is broken")
print("total images is:{}".format(num))
for line in fh:
line = line.strip()
imgs.append(line)
self.imgs = imgs
self.transform = transform
self.loader = loader
def __getitem__(self, item):
fh = self.imgs[item]
img = self.loader(fh)
if self.transform is not None:
img = self.transform(img)
return fh, img
def __len__(self):
return len(self.imgs)
#### -------------------------------如何定义batch的读写方式-------------------------------
#### -------------------------------图像模糊的定义-------------------------------
def variance_of_laplacian(image):
return cv2.Laplacian(image, cv2.CV_64f).var()
## 如何定义接口函数
def imgQualJudge(img, QA_THRESHOLD):
'''
:param img:
:param QA_THRESHOLD: 越高越清晰
:return: 是否模糊0为模糊1为清晰
'''
norheight = 1707
norwidth = 1280
flag = 0
# 筛选尺寸
if max(img.shape[0], img.shape[1]) < 320:
flag = '10002'
return flag
# 模糊筛选部分
if img.shape[0] <= img.shape[1]:
size1 = (norheight, norwidth)
timage = cv2.resize(img, size1)
else:
size2 = (norwidth, norheight)
timage = cv2.resize(img, size2)
tgray = cv2.cvtColor(timage, cv2.COLOR_BGR2GRAY)
halfgray = tgray[0:int(tgray.shape[0] / 2), 0:tgray.shape[1]]
norgrayImg = np.zeros(halfgray.shape, np.int8)
cv2.normalize(halfgray, norgrayImg, 0, 255, cv2.NORM_MINMAX)
fm = variance_of_laplacian(norgrayImg) # 模糊值
if fm < QA_THRESHOLD:
flag = '10001'
return flag
return flag
def process(img_path):
img = Image.open(img_path).convert("RGB")
valid = True
low_quality = "10001"
size_error = "10002"
flag = imgQualJudge(np.array(img), 5)
if flag == low_quality or flag == size_error or not img or 0 in np.asarray(img).shape[:2]:
valid = False
return valid
#### -------------------------------图像模糊的定义-------------------------------
def build_dict():
dict_club = dict()
dict_club[0] = ["身份证", 0.999999]
dict_club[1] = ["校园卡", 0.890876]
return dict_club
class Classifier():
def __init__(self):
self.device = torch.device('cuda')
self.class_id_name_dict = build_dict()
self.mean = [0.485, 0.456, 0.406]
self.std = [0.229, 0.224, 0.225]
self.input_size = 448
self.use_cuda = torch.cuda.is_available()
self.model = AttentionNet(topN=4)
self.model.eval()
checkpoint = torch.load("./.ckpt")
newweights = checkpoint['net_state_dict']
# 多卡测试转为单卡
new_state_dic = OrderedDict()
for k, v in newweights.items():
name = k[7:] if k.startwith("module.") else k
new_state_dic[name] = v
self.model.load_state_dict(new_state_dic)
self.model = self.model.to(self.device)
def evalute(self, dir_path):
data = MyDataset(dir_path, transform=self.preprocess)
dataloader = DataLoader(dataset=data, batch_size=32, num_workers=8)
self.model.eval()
with torch.no_grad():
num = 0
for i, (data, path) in enumerate(dataloader, 1):
data = data.to(self.device)
output = self.model(data)
for j in range(len(data)):
img_path = path[j]
img_output = output[1][j]
score, label, type = self.postprocess(img_output)
out_dict, score = self.process(score, label, type)
class_id = out_dict["results"]["class2"]["code"]
num += 1
if class_id != '00038':
os.remove(img_path)
else:
continue
def preprocess(self, img):
img = transforms.Resize((600, 600), Image.BILINEAR)(img)
img = transforms.CenterCrop(self.input_size)(img)
img = transforms.ToTensor()(img)
img = transforms.Normalize(self.mean, self.std)
def postprocess(self, output):
pred_logits = F.softmax(output, dim=0)
score, label = pred_logits.max(0)
score = score.item()
label = label.item()
type = self.class_id_name_dict[label][0]
return score, label, type
def process(self, score, label, type):
success_code = "200"
lower_conf_code = "10008"
threshold = float(self.class_id_name_dict[label][1])
if threshold > 0.99:
threshold = 0.99
if threshold < 0.9:
threshold = 0.9
## 设置查勘图片较低的阈值
if label == 38:
threshold = 0.5
if score > threshold:
status_code = success_code
pred_label = str(label).zfill(5)
print("pred_label:", pred_label)
return {"code:": status_code, "message": '图像分类成功',
"results": {"class2": {'code': pred_label, 'name': type}}}, score
else:
status_code = lower_conf_code
return {"code:": status_code, "message": '图像分类置信度低,不返回结果',
"results": {"class2": {'code': '', 'name': ''}}}, score
def class_results(img_dir):
Classifier().evalute(img_dir)
if __name__ == "__main__":
pass