Compare commits
80 Commits
Author | SHA1 | Date | |
---|---|---|---|
92b8b5098e | |||
6bbbb07fde | |||
5cb08121b7 | |||
90a4691e15 | |||
6373df08b9 | |||
fd366d76b0 | |||
207bc1603c | |||
489d1a2086 | |||
f08ffe99f7 | |||
1f63f8ed93 | |||
c1320aa09a | |||
1afc0d6617 | |||
6478d08647 | |||
29ae39e46f | |||
2fb1b29f5c | |||
5d5788fe6f | |||
2a89533188 | |||
611db26c8a | |||
d75f905648 | |||
d667de427a | |||
f1734594a9 | |||
3aee6d6c7a | |||
40154b9825 | |||
7a1c101307 | |||
e6270608d1 | |||
f5dc4b6af4 | |||
a07331a414 | |||
7c91d16e35 | |||
0ac741c608 | |||
a8b05b53e5 | |||
f3dd348fc8 | |||
d43a7189b6 | |||
9363d22815 | |||
4e640531da | |||
b461f8a2d4 | |||
8d6aa18fca | |||
5e4279f4e1 | |||
a4f1e43ebe | |||
c12ff00e01 | |||
282f1e4fde | |||
a0dda64ad5 | |||
5c21167991 | |||
835c923dbc | |||
df8ba7cee6 | |||
99cbdaaa48 | |||
a699574975 | |||
e558921a6b | |||
5a8c6a5d2e | |||
a94d0f19e3 | |||
018e37835b | |||
d376fa8c5f | |||
14558af47b | |||
952d7e3a3c | |||
9912dca40c | |||
fb45a96528 | |||
55962c44e4 | |||
4c278ed872 | |||
8de11a03d8 | |||
0c2e0dccac | |||
accca98d1c | |||
52427ac8a9 | |||
1090f009d5 | |||
e2057ccf17 | |||
e9e4dd393b | |||
8bdd089fe6 | |||
062e2245de | |||
d5504fce91 | |||
9808dc19b1 | |||
bd8f639537 | |||
6703c1bcaa | |||
23b382e459 | |||
7a1d066fdc | |||
33143a10ed | |||
1d4997bd42 | |||
597882178e | |||
4e9485090a | |||
70e3e2fbb3 | |||
9c3915e361 | |||
99f6ee4298 | |||
98851b7797 |
33
callclient.py
Normal file
33
callclient.py
Normal file
@ -0,0 +1,33 @@
|
||||
import requests
|
||||
import json
|
||||
import base64
|
||||
import socket
|
||||
|
||||
def getByte(path):
|
||||
with open(path, 'rb') as f:
|
||||
img_byte = base64.b64encode(f.read())
|
||||
img_str = img_byte.decode('utf-8')
|
||||
return img_str
|
||||
|
||||
|
||||
img_str = getByte('./emptyJudge5/images/fly/f3fca24bf71ee1824441e6a79aaaec34.jpg')
|
||||
|
||||
url = ' http://192.168.1.142:14465/isempty'
|
||||
|
||||
data = {'pic':img_str}
|
||||
|
||||
json_mod = json.dumps(data)
|
||||
|
||||
s = requests.session()
|
||||
|
||||
s.keep_alive = False
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.100 Safari/537.36',
|
||||
}
|
||||
|
||||
s = requests.session() # 保持会话
|
||||
|
||||
res = s.post(url=url, data=json_mod, headers=headers)
|
||||
|
||||
print(res.text)
|
417
hello.py
Normal file
417
hello.py
Normal file
@ -0,0 +1,417 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from vit_pytorch import ViT,SimpleViT,MAE
|
||||
from vit_pytorch.distill import DistillableViT
|
||||
from vit_pytorch.deepvit import DeepViT
|
||||
from vit_pytorch.cait import CaiT
|
||||
from vit_pytorch.pit import PiT
|
||||
from vit_pytorch.regionvit import RegionViT
|
||||
from vit_pytorch.sep_vit import SepViT
|
||||
from vit_pytorch.crossformer import CrossFormer
|
||||
from vit_pytorch.nest import NesT
|
||||
from vit_pytorch.mobile_vit import MobileViT
|
||||
from vit_pytorch.simmim import SimMIM
|
||||
from vit_pytorch.ats_vit import ViT
|
||||
|
||||
from utils.data_utils import get_loader_new
|
||||
from utils.scheduler import WarmupCosineSchedule
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
def net():
|
||||
|
||||
# model = ViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 1024,
|
||||
# depth = 6,
|
||||
# max_tokens_per_depth = (256, 128, 64, 32, 16, 8), # a tuple that denotes the maximum number of tokens that any given layer should have. if the layer has greater than this amount, it will undergo adaptive token sampling
|
||||
# heads = 16,
|
||||
# mlp_dim = 2048,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
# modelv = ViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 1024,
|
||||
# depth = 6,
|
||||
# heads = 8,
|
||||
# mlp_dim = 2048
|
||||
# )
|
||||
|
||||
# model = MAE(
|
||||
# encoder = modelv,
|
||||
# masking_ratio = 0.5 # they found 50% to yield the best results
|
||||
# )
|
||||
|
||||
model = NesT(
|
||||
image_size = 600,
|
||||
patch_size = 30,
|
||||
dim = 256,
|
||||
heads = 18,#16
|
||||
num_hierarchies = 3, # number of hierarchies
|
||||
block_repeats = (2, 4, 18), # (2,4,16)(2,4,12)(2,2,12) the number of transformer blocks at each heirarchy, starting from the bottom
|
||||
num_classes = 5
|
||||
)
|
||||
|
||||
|
||||
# model = NesT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# dim = 256,
|
||||
# heads = 16,#16
|
||||
# num_hierarchies = 3, # number of hierarchies
|
||||
# block_repeats = (2, 3, 16), # (2,2,12) the number of transformer blocks at each heirarchy, starting from the bottom
|
||||
# num_classes = 5
|
||||
# )
|
||||
|
||||
# model = CrossFormer( #图片尺寸要是7的倍数,如448
|
||||
# num_classes = 5, # number of output classes
|
||||
# dim = (64, 128, 256, 512), # dimension at each stage
|
||||
# depth = (2, 2, 8, 2), # depth of transformer at each stage
|
||||
# global_window_size = (8, 4, 2, 1), # global window sizes at each stage
|
||||
# local_window_size = 7, # local window size (can be customized for each stage, but in paper, held constant at 7 for all stages)
|
||||
# )
|
||||
|
||||
# model = RegionViT( #图片尺寸要是7的倍数,如448
|
||||
# dim = (64, 128, 256, 512), # tuple of size 4, indicating dimension at each stage
|
||||
# depth = (2, 2, 8, 2), # depth of the region to local transformer at each stage
|
||||
# window_size = 7, # window size, which should be either 7 or 14
|
||||
# num_classes = 5, # number of output classes
|
||||
# tokenize_local_3_conv = False, # whether to use a 3 layer convolution to encode the local tokens from the image. the paper uses this for the smaller models, but uses only 1 conv (set to False) for the larger models
|
||||
# use_peg = False, # whether to use positional generating module. they used this for object detection for a boost in performance
|
||||
# )
|
||||
|
||||
# model = SepViT( #图片尺寸要是7的倍数,如448
|
||||
# num_classes = 5,
|
||||
# dim = 32, # dimensions of first stage, which doubles every stage (32, 64, 128, 256) for SepViT-Lite
|
||||
# dim_head = 32, # attention head dimension
|
||||
# heads = (1, 2, 4, 8), # number of heads per stage
|
||||
# depth = (1, 2, 6, 2), # number of transformer blocks per stage
|
||||
# window_size = 7, # window size of DSS Attention block
|
||||
# dropout = 0.1 # dropout
|
||||
# )
|
||||
|
||||
# model = PiT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# dim = 1024,
|
||||
# num_classes = 5,
|
||||
# depth = (1, 1, 1), # list of depths, indicating the number of rounds of each stage before a downsample
|
||||
# heads = 8,
|
||||
# mlp_dim = 3072,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
|
||||
# model = CaiT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 1000,
|
||||
# dim = 1024,
|
||||
# depth = 12, # depth of transformer for patch to patch attention only
|
||||
# cls_depth = 2, # depth of cross attention of CLS tokens to patch
|
||||
# heads = 16,
|
||||
# mlp_dim = 2048,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1,
|
||||
# layer_dropout = 0.05 # randomly dropout 5% of the layers
|
||||
# )
|
||||
|
||||
# model = DeepViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 256,
|
||||
# depth = 6,
|
||||
# heads = 6,
|
||||
# mlp_dim = 256,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
# model = DistillableViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 1080,
|
||||
# depth = 12,
|
||||
# heads = 12,
|
||||
# mlp_dim = 3072,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
|
||||
# model = ViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 768,
|
||||
# depth = 12,
|
||||
# heads = 12,
|
||||
# mlp_dim = 3072,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
# model = SimpleViT(
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 2,
|
||||
# dim = 256,
|
||||
# depth = 6,
|
||||
# heads = 16,
|
||||
# mlp_dim = 256
|
||||
# )
|
||||
|
||||
# model = ViT(
|
||||
# #Vit-best
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 512,
|
||||
# depth = 6,
|
||||
# heads = 8,
|
||||
# mlp_dim = 512,
|
||||
# pool = 'cls',
|
||||
# channels = 3,
|
||||
# dim_head = 12,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
|
||||
# #Vit-small
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 256,
|
||||
# depth = 8,
|
||||
# heads = 16,
|
||||
# mlp_dim = 256,
|
||||
# pool = 'cls',
|
||||
# channels = 3,
|
||||
# dim_head = 16,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
|
||||
# #Vit-tiny
|
||||
# image_size = 600,
|
||||
# patch_size = 30,
|
||||
# num_classes = 5,
|
||||
# dim = 256,
|
||||
# depth = 4,
|
||||
# heads = 6,
|
||||
# mlp_dim = 256,
|
||||
# pool = 'cls',
|
||||
# channels = 3,
|
||||
# dim_head = 6,
|
||||
# dropout = 0.1,
|
||||
# emb_dropout = 0.1
|
||||
# )
|
||||
|
||||
|
||||
return model
|
||||
|
||||
# img = torch.randn(1, 3, 448, 448)
|
||||
# model = net()
|
||||
# preds = model(img) # (1, 1000)
|
||||
|
||||
|
||||
#计算模型参数数量
|
||||
def count_parameters(model):
|
||||
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
return params/1000000
|
||||
|
||||
|
||||
#Loss平均
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
#简单准确率
|
||||
def simple_accuracy(preds, labels):
|
||||
return (preds == labels).mean()
|
||||
|
||||
#模型测试
|
||||
def test(device, model, test_loader, global_step):
|
||||
eval_losses = AverageMeter()
|
||||
|
||||
print("***** Running Validation *****")
|
||||
|
||||
|
||||
model.eval()
|
||||
all_preds, all_label = [], []
|
||||
epoch_iterator = tqdm(test_loader,
|
||||
desc="Validating... (loss=X.X)",
|
||||
bar_format="{l_bar}{r_bar}",
|
||||
dynamic_ncols=True)
|
||||
|
||||
for step, batch in enumerate(epoch_iterator):
|
||||
batch = tuple(t.to(device) for t in batch)
|
||||
x, y = batch
|
||||
with torch.no_grad():
|
||||
logits = model(x)
|
||||
|
||||
eval_loss = criterion(logits, y)
|
||||
eval_loss = eval_loss.mean()
|
||||
eval_losses.update(eval_loss.item())
|
||||
|
||||
preds = torch.argmax(logits, dim=-1)
|
||||
|
||||
if len(all_preds) == 0:
|
||||
all_preds.append(preds.detach().cpu().numpy())
|
||||
all_label.append(y.detach().cpu().numpy())
|
||||
else:
|
||||
all_preds[0] = np.append(
|
||||
all_preds[0], preds.detach().cpu().numpy(), axis=0
|
||||
)
|
||||
all_label[0] = np.append(
|
||||
all_label[0], y.detach().cpu().numpy(), axis=0
|
||||
)
|
||||
epoch_iterator.set_description("Validating... (loss=%2.5f)" % eval_losses.val)
|
||||
|
||||
all_preds, all_label = all_preds[0], all_label[0]
|
||||
accuracy = simple_accuracy(all_preds, all_label)
|
||||
accuracy = torch.tensor(accuracy).to(device)
|
||||
|
||||
val_accuracy = accuracy.detach().cpu().numpy()
|
||||
|
||||
|
||||
print("test Loss: %2.5f" % eval_losses.avg)
|
||||
print("test Accuracy: %2.5f" % val_accuracy)
|
||||
|
||||
return val_accuracy
|
||||
|
||||
|
||||
#保存模型
|
||||
def save_model(model):
|
||||
model_checkpoint = os.path.join('./output', "%s_vit_checkpoint.pth" % 'ieemooempty')
|
||||
torch.save(model, model_checkpoint)
|
||||
print("Saved model checkpoint to [DIR: %s]", './output')
|
||||
|
||||
|
||||
#训练
|
||||
def train(model,train_loader,device,train_NUM_STEPS,LEARNING_RATE,WEIGHT_DECAY,WARMUP_STEPS,test_loader):
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(),
|
||||
lr=LEARNING_RATE,
|
||||
momentum=0.9,
|
||||
weight_decay=WEIGHT_DECAY)
|
||||
t_total = train_NUM_STEPS
|
||||
scheduler = WarmupCosineSchedule(optimizer, warmup_steps=WARMUP_STEPS, t_total=t_total)
|
||||
model.zero_grad()
|
||||
losses = AverageMeter()
|
||||
global_step, best_acc = 0, 0
|
||||
gradient_accumulation_steps = 1
|
||||
while True:
|
||||
model.train()
|
||||
epoch_iterator = tqdm(train_loader,
|
||||
desc="Training (X / X Steps) (loss=X.X)",
|
||||
bar_format="{l_bar}{r_bar}",
|
||||
dynamic_ncols=True)
|
||||
all_preds, all_label = [], []
|
||||
for step, batch in enumerate(epoch_iterator):
|
||||
batch = tuple(t.to(device) for t in batch)
|
||||
x, y = batch
|
||||
logits = model(x)
|
||||
|
||||
loss = criterion(logits,y)
|
||||
|
||||
loss.backward()
|
||||
|
||||
preds = torch.argmax(logits, dim=-1)
|
||||
|
||||
if len(all_preds) == 0:
|
||||
all_preds.append(preds.detach().cpu().numpy())
|
||||
all_label.append(y.detach().cpu().numpy())
|
||||
else:
|
||||
all_preds[0] = np.append(
|
||||
all_preds[0], preds.detach().cpu().numpy(), axis=0
|
||||
)
|
||||
all_label[0] = np.append(
|
||||
all_label[0], y.detach().cpu().numpy(), axis=0
|
||||
)
|
||||
|
||||
if (step + 1) % gradient_accumulation_steps == 0:
|
||||
losses.update(loss.item()*gradient_accumulation_steps)
|
||||
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
scheduler.step()
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
global_step += 1
|
||||
|
||||
epoch_iterator.set_description(
|
||||
"Training (%d / %d Steps) (loss=%2.5f)" % (global_step, t_total, losses.val)
|
||||
)
|
||||
|
||||
model.train() #需要2次,才会保存训练好的模型
|
||||
|
||||
|
||||
if global_step % t_total == 0:
|
||||
|
||||
|
||||
break
|
||||
|
||||
all_preds, all_label = all_preds[0], all_label[0]
|
||||
accuracy = simple_accuracy(all_preds, all_label)
|
||||
accuracy = torch.tensor(accuracy).to(device)
|
||||
train_accuracy = accuracy.detach().cpu().numpy()
|
||||
print("train accuracy: %f" % train_accuracy)
|
||||
accuracy = test(device, model, test_loader, global_step)
|
||||
|
||||
if best_acc < accuracy:
|
||||
save_model(model)
|
||||
best_acc = accuracy
|
||||
|
||||
losses.reset()
|
||||
if global_step % t_total == 0:
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
model = net()
|
||||
train_loader, test_loader = get_loader_new()
|
||||
trainnumsteps = len(train_loader)
|
||||
testnumsteps = len(test_loader)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
print(str(count_parameters(model))+'MB')
|
||||
# img = torch.randn(1, 3, 320, 320) #(batchsize,channels,width,higth) #3072是默认channels为3,3*1024
|
||||
# preds = model(img)
|
||||
# print(preds)
|
||||
|
||||
model = model.to(device)
|
||||
epoch = 300
|
||||
train_NUM_STEPS = trainnumsteps * epoch
|
||||
LEARNING_RATE = 3e-2
|
||||
WEIGHT_DECAY = 0
|
||||
WARMUP_STEPS = 500
|
||||
train(model,train_loader,device,train_NUM_STEPS,LEARNING_RATE,WEIGHT_DECAY,WARMUP_STEPS,test_loader)
|
48
lightrise.py
Normal file
48
lightrise.py
Normal file
@ -0,0 +1,48 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
import argparse
|
||||
from models.modeling import VisionTransformer, CONFIGS
|
||||
import time
|
||||
|
||||
|
||||
|
||||
#复杂场景小模型测试单张图片
|
||||
def riseempty(imgdata):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pretrained_model", type=str, default="../module/ieemoo-ai-isempty/model/new/ieemooempty_vitlight_checkpoint.pth", help="load pretrained model") #使用自定义VIT
|
||||
args = parser.parse_args()
|
||||
|
||||
#args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
args.device = torch.device("cpu")
|
||||
|
||||
num_classes = 2
|
||||
cls_dict = {0: "noemp", 1: "yesemp"}
|
||||
|
||||
test_transform = transforms.Compose([transforms.Resize((600, 600), Image.BILINEAR),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
# 准备模型
|
||||
model = torch.load(args.pretrained_model,map_location=torch.device('cpu')) #自己预训练模型
|
||||
model.to(args.device)
|
||||
model.eval()
|
||||
x = test_transform(imgdata)
|
||||
part_logits = model(x.unsqueeze(0).to(args.device))
|
||||
probs = torch.nn.Softmax(dim=-1)(part_logits)
|
||||
top2 = torch.argsort(probs, dim=-1, descending=True)
|
||||
riseclas_ids = top2[0][0]
|
||||
#print("cur_img result: class id: %d, score: %0.3f" % (riseclas_ids, probs[0, riseclas_ids].item()))
|
||||
riseresult={}
|
||||
riseresult["success"] = "true"
|
||||
riseresult["rst_cls"] = int(riseclas_ids)
|
||||
|
||||
return riseresult
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# riseresult = riseempty("light.jpg")
|
||||
# print(riseresult)
|
||||
|
||||
|
39
onx.py
Normal file
39
onx.py
Normal file
@ -0,0 +1,39 @@
|
||||
import numpy as np
|
||||
import json
|
||||
import time
|
||||
import cv2, base64
|
||||
import argparse
|
||||
import sys, os
|
||||
import torch
|
||||
from gevent.pywsgi import WSGIServer
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
from models.modeling import VisionTransformer, CONFIGS
|
||||
from vit_pytorch import ViT
|
||||
|
||||
model = torch.load("../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.bin",map_location="cpu")
|
||||
model.eval()
|
||||
model.to("cpu")
|
||||
|
||||
test_transform = transforms.Compose([transforms.Resize((600, 600), Image.BILINEAR),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
img = Image.open("img.jpg")
|
||||
x = test_transform(img)
|
||||
part_logits = model(x.unsqueeze(0))
|
||||
probs = torch.nn.Softmax(dim=-1)(part_logits)
|
||||
topN = torch.argsort(probs, dim=-1, descending=True).tolist()
|
||||
clas_ids = topN[0][0]
|
||||
clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1
|
||||
result={}
|
||||
result["success"] = "true"
|
||||
result["rst_cls"] = str(clas_ids)
|
||||
|
||||
print(result)
|
||||
|
||||
|
||||
input = torch.randn(1, 3, 600, 600) # BCHW 其中Batch必须为1,因为测试时一般为1,尺寸HW必须和训练时的尺寸一致
|
||||
torch.onnx.export(model, input, "../module/ieemoo-ai-isempty/model/now/emptyjudge5_checkpoint.onx", verbose=False)
|
||||
|
||||
|
||||
|
@ -1,64 +0,0 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
import argparse
|
||||
from models.modeling import VisionTransformer, CONFIGS
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dataset", choices=["CUB_200_2011", "emptyJudge5", "emptyJudge4"], default="emptyJudge5", help="Which dataset.")
|
||||
parser.add_argument("--img_size", default=448, type=int, help="Resolution size")
|
||||
parser.add_argument('--split', type=str, default='overlap', help="Split method") # non-overlap
|
||||
parser.add_argument('--slide_step', type=int, default=12, help="Slide step for overlap split")
|
||||
parser.add_argument('--smoothing_value', type=float, default=0.0, help="Label smoothing value\n")
|
||||
parser.add_argument("--pretrained_model", type=str, default="output/emptyjudge5_checkpoint.bin", help="load pretrained model")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
args.nprocs = torch.cuda.device_count()
|
||||
|
||||
# Prepare Model
|
||||
config = CONFIGS["ViT-B_16"]
|
||||
config.split = args.split
|
||||
config.slide_step = args.slide_step
|
||||
|
||||
cls_dict = {}
|
||||
num_classes = 0
|
||||
if args.dataset == "emptyJudge5":
|
||||
num_classes = 5
|
||||
cls_dict = {0: "noemp", 1: "yesemp", 2: "hard", 3: "fly", 4: "stack"}
|
||||
elif args.dataset == "emptyJudge4":
|
||||
num_classes = 4
|
||||
cls_dict = {0: "noemp", 1: "yesemp", 2: "hard", 3: "stack"}
|
||||
elif args.dataset == "emptyJudge3":
|
||||
num_classes = 3
|
||||
cls_dict = {0: "noemp", 1: "yesemp", 2: "hard"}
|
||||
elif args.dataset == "emptyJudge2":
|
||||
num_classes = 2
|
||||
cls_dict = {0: "noemp", 1: "yesemp"}
|
||||
model = VisionTransformer(config, args.img_size, zero_head=True, num_classes=num_classes, smoothing_value=args.smoothing_value)
|
||||
if args.pretrained_model is not None:
|
||||
pretrained_model = torch.load(args.pretrained_model, map_location=torch.device('cpu'))['model']
|
||||
model.load_state_dict(pretrained_model)
|
||||
model.to(args.device)
|
||||
model.eval()
|
||||
# test_transform = transforms.Compose([transforms.Resize((600, 600), Image.BILINEAR),
|
||||
# transforms.CenterCrop((448, 448)),
|
||||
# transforms.ToTensor(),
|
||||
# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
test_transform = transforms.Compose([transforms.Resize((448, 448), Image.BILINEAR),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
img = Image.open("img.jpg")
|
||||
x = test_transform(img)
|
||||
part_logits = model(x.unsqueeze(0))
|
||||
|
||||
probs = torch.nn.Softmax(dim=-1)(part_logits)
|
||||
top5 = torch.argsort(probs, dim=-1, descending=True)
|
||||
print("Prediction Label\n")
|
||||
for idx in top5[0, :5]:
|
||||
print(f'{probs[0, idx.item()]:.5f} : {cls_dict[idx.item()]}', end='\n')
|
||||
|
98
testsingle.py
Executable file
98
testsingle.py
Executable file
@ -0,0 +1,98 @@
|
||||
# coding=utf-8
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
import argparse
|
||||
from models.modeling import VisionTransformer, CONFIGS
|
||||
import time
|
||||
import lightrise
|
||||
|
||||
|
||||
#模型测试单张图片
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dataset", choices=["emptyJudge5"], default="emptyJudge5", help="Which dataset.")
|
||||
parser.add_argument("--img_size", default=600, type=int, help="Resolution size")
|
||||
parser.add_argument('--split', type=str, default='overlap', help="Split method") # non-overlap
|
||||
parser.add_argument('--slide_step', type=int, default=12, help="Slide step for overlap split")
|
||||
parser.add_argument('--smoothing_value', type=float, default=0.0, help="Label smoothing value\n")
|
||||
parser.add_argument("--pretrained_model", type=str, default="../module/ieemoo-ai-isempty/model/new/ieemooempty_vit_checkpoint.pth", help="load pretrained model")
|
||||
#parser.add_argument("--pretrained_model", type=str, default="output/ieemooempty_vit_checkpoint.pth", help="load pretrained model") #使用自定义VIT
|
||||
args = parser.parse_args()
|
||||
|
||||
#args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
args.device = torch.device("cpu")
|
||||
args.nprocs = torch.cuda.device_count()
|
||||
|
||||
# 准备模型
|
||||
config = CONFIGS["ViT-B_16"]
|
||||
config.split = args.split
|
||||
config.slide_step = args.slide_step
|
||||
|
||||
num_classes = 5
|
||||
cls_dict = {0: "noemp", 1: "yesemp", 2: "hard", 3: "fly", 4: "stack"}
|
||||
|
||||
model = None
|
||||
#model = VisionTransformer(config, args.img_size, zero_head=True, num_classes=num_classes, smoothing_value=args.smoothing_value)
|
||||
|
||||
if args.pretrained_model is not None:
|
||||
model = torch.load(args.pretrained_model,map_location=torch.device('cpu')) #自己预训练模型
|
||||
model.to(args.device)
|
||||
model.eval()
|
||||
|
||||
test_transform = transforms.Compose([transforms.Resize((600, 600), Image.BILINEAR),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
|
||||
|
||||
|
||||
#自定义Vit模型
|
||||
# test_transform = transforms.Compose([transforms.Resize((320, 320), Image.BILINEAR),
|
||||
# transforms.ToTensor(),
|
||||
# transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
|
||||
|
||||
|
||||
|
||||
#img = Image.open("img.jpg")
|
||||
|
||||
img = Image.open("light.jpg")
|
||||
|
||||
|
||||
x = test_transform(img)
|
||||
|
||||
|
||||
startime = time.process_time()
|
||||
|
||||
part_logits = model(x.unsqueeze(0).to(args.device))
|
||||
|
||||
probs = torch.nn.Softmax(dim=-1)(part_logits)
|
||||
top5 = torch.argsort(probs, dim=-1, descending=True)
|
||||
print("Prediction Label\n")
|
||||
for idx in top5[0, :5]:
|
||||
print(f'{probs[0, idx.item()]:.5f} : {cls_dict[idx.item()]}', end='\n')
|
||||
|
||||
clas_ids = top5[0][0]
|
||||
clas_ids = 0 if 0==int(clas_ids) or 2 == int(clas_ids) or 3 == int(clas_ids) else 1
|
||||
print("cur_img result: class id: %d, score: %0.3f" % (clas_ids, probs[0, clas_ids].item()))
|
||||
|
||||
result={}
|
||||
result["success"] = "true"
|
||||
result["rst_cls"] = str(clas_ids)
|
||||
|
||||
riseresult = lightrise.riseempty(Image.open("light.jpg"))
|
||||
if(int(result["rst_cls"])==1):
|
||||
if(int(riseresult["rst_cls"])==1):
|
||||
result = {}
|
||||
result["success"] = "true"
|
||||
result["rst_cls"] = 1
|
||||
else:
|
||||
result = {}
|
||||
result["success"] = "true"
|
||||
result["rst_cls"] = 0
|
||||
|
||||
print(result)
|
||||
|
||||
endtime = time.process_time()
|
||||
|
||||
print("Time cost:"+ str(endtime - startime)) #评估一张图片耗时2.8秒
|
Reference in New Issue
Block a user