# Load model directly from transformers import AutoModel, AutoTokenizer import torch from PIL import Image model = AutoModel.from_pretrained( "openbmb/MiniCPM-o-2_6", trust_remote_code=True, attn_implementation='flash_attention_2', torch_dtype=torch.bfloat16, # device_map="auto" ) model = model.eval().cuda() tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-o-2_6', use_fast=True, trust_remote_code=True) img1 = Image.open('/home/ieemoo0337/projects/datasets/constrast_pair/8850813311020/8850813311020.jpg') img2 = Image.open('/home/ieemoo0337/projects/datasets/constrast_pair/8850511321499/8850511321499.jpg') question = '描述第一张图像的1。' msgs = [{'role': 'user', 'content': [img1, img2, question]}] answer = model.chat( msgs=msgs, tokenizer=tokenizer ) print(answer)