|
import open_clip |
|
import torch |
|
from PIL import Image |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model, _, transform = open_clip.create_model_and_transforms( |
|
model_name="coca_biogpt_vitb16", |
|
pretrained="coca_biogpt_vitb16.pt", |
|
) |
|
model.to(device) |
|
model.eval() |
|
nb = 1 |
|
path = "example.png" |
|
im = Image.open(path).convert("RGB") |
|
im = transform(im).unsqueeze(0) |
|
im = im.to(device) |
|
im = im.repeat(nb,1,1,1) |
|
print(im.shape) |
|
tokenizer = open_clip.get_tokenizer("coca_biogpt_vitb16") |
|
print(tokenizer.tokenizer) |
|
with torch.no_grad(): |
|
generated = model.generate( |
|
im, |
|
pad_token_id=1, eos_token_id=2, sot_token_id=0, max_seq_len=256, seq_len=60, |
|
|
|
|
|
generation_type='beam_search', |
|
|
|
|
|
|
|
|
|
|
|
|
|
) |
|
print(generated) |
|
for i in range(nb): |
|
if hasattr(tokenizer, "tokenizer"): |
|
print(tokenizer.tokenizer.decode(generated[i])) |
|
else: |
|
print(open_clip.decode(generated[i])) |
|
|