xxx1 commited on
Commit
9a59d7a
1 Parent(s): e9fbb59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -2,6 +2,13 @@ import string
2
  import gradio as gr
3
  import requests
4
  import torch
 
 
 
 
 
 
 
5
 
6
 
7
  from transformers import BlipForQuestionAnswering, BlipProcessor
@@ -41,15 +48,19 @@ def gpt3(question,vqa_answer,caption):
41
  # return "input_text:\n"+prompt+"\n\n output_answer:\n"+answer
42
  return answer
43
 
44
-
 
 
45
  def inference_chat(input_image,input_text):
46
  cap=caption(input_image)
47
- inputs = processor(images=input_image, text=input_text,return_tensors="pt")
48
- inputs["max_length"] = 10
49
- inputs["num_beams"] = 5
50
- inputs['num_return_sequences'] =4
51
- out = model_vqa.generate(**inputs)
52
- out=processor.batch_decode(out, skip_special_tokens=True)
 
 
53
  vqa="\n".join(out)
54
  gpt3_out=gpt3(input_text,vqa,cap)
55
  gpt3_out1=gpt3(input_text,'',cap)
 
2
  import gradio as gr
3
  import requests
4
  import torch
5
+ from models.VLE import VLEForVQA, VLEProcessor, VLEForVQAPipeline
6
+ from PIL import Image
7
+
8
+ model_name="hfl/vle-base-for-vqa"
9
+ model = VLEForVQA.from_pretrained(model_name)
10
+ vle_processor = VLEProcessor.from_pretrained(model_name)
11
+ vqa_pipeline = VLEForVQAPipeline(model=model, device='cpu', vle_processor=vle_processor)
12
 
13
 
14
  from transformers import BlipForQuestionAnswering, BlipProcessor
 
48
  # return "input_text:\n"+prompt+"\n\n output_answer:\n"+answer
49
  return answer
50
 
51
+ def vle(input_image,input_text):
52
+ vqa_answers = vqa_pipeline(image=input_image, question=input_image, top_k=4)
53
+ return vqa_answers
54
  def inference_chat(input_image,input_text):
55
  cap=caption(input_image)
56
+ # inputs = processor(images=input_image, text=input_text,return_tensors="pt")
57
+ # inputs["max_length"] = 10
58
+ # inputs["num_beams"] = 5
59
+ # inputs['num_return_sequences'] =4
60
+ # out = model_vqa.generate(**inputs)
61
+ # out=processor.batch_decode(out, skip_special_tokens=True)
62
+
63
+ out=vle(input_image,input_text)
64
  vqa="\n".join(out)
65
  gpt3_out=gpt3(input_text,vqa,cap)
66
  gpt3_out1=gpt3(input_text,'',cap)