25 lines
823 B
Python
25 lines
823 B
Python
# Load model directly
|
|
from transformers import AutoModel, AutoTokenizer
|
|
import torch
|
|
from PIL import Image
|
|
|
|
model = AutoModel.from_pretrained(
|
|
"openbmb/MiniCPM-o-2_6",
|
|
trust_remote_code=True,
|
|
attn_implementation='flash_attention_2',
|
|
torch_dtype=torch.bfloat16,
|
|
# device_map="auto"
|
|
)
|
|
model = model.eval().cuda()
|
|
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-o-2_6', use_fast=True, trust_remote_code=True)
|
|
|
|
img1 = Image.open('/home/ieemoo0337/projects/datasets/constrast_pair/8850813311020/8850813311020.jpg')
|
|
img2 = Image.open('/home/ieemoo0337/projects/datasets/constrast_pair/8850511321499/8850511321499.jpg')
|
|
|
|
question = '描述第一张图像的1。'
|
|
msgs = [{'role': 'user', 'content': [img1, img2, question]}]
|
|
answer = model.chat(
|
|
msgs=msgs,
|
|
tokenizer=tokenizer
|
|
)
|
|
print(answer) |