Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
3 |
import spaces
|
|
|
4 |
import io
|
5 |
-
import base64 # Adicionando a biblioteca base64 para decodificação
|
6 |
from PIL import Image
|
|
|
7 |
import subprocess
|
8 |
-
|
9 |
-
# Instalando a dependência flash-attn se necessário
|
10 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
11 |
-
|
12 |
-
# Carregando o modelo e o processador
|
13 |
model_id = 'J-LAB/Florence-vl3'
|
14 |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
|
15 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
@@ -35,35 +33,28 @@ def run_example(task_prompt, image):
|
|
35 |
)
|
36 |
return parsed_answer
|
37 |
|
38 |
-
# Função para processar imagens, agora suportando Base64
|
39 |
def process_image(image, task_prompt):
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
image = Image.open(io.BytesIO(image))
|
48 |
-
else:
|
49 |
-
image = Image.fromarray(image) # Convertendo um array NumPy para imagem PIL, se aplicável
|
50 |
-
|
51 |
-
# Mapeando os prompts de tarefas
|
52 |
if task_prompt == 'Product Caption':
|
53 |
task_prompt = '<MORE_DETAILED_CAPTION>'
|
54 |
elif task_prompt == 'OCR':
|
55 |
task_prompt = '<OCR>'
|
56 |
|
57 |
-
# Chamando o exemplo com a imagem processada e o prompt da tarefa
|
58 |
results = run_example(task_prompt, image)
|
59 |
|
60 |
-
#
|
61 |
if results and task_prompt in results:
|
62 |
output_text = results[task_prompt]
|
63 |
else:
|
64 |
output_text = ""
|
65 |
|
66 |
-
#
|
67 |
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
|
68 |
|
69 |
return output_text
|
@@ -92,47 +83,41 @@ document.querySelector('button').addEventListener('click', function() {
|
|
92 |
});
|
93 |
"""
|
94 |
|
95 |
-
single_task_list =
|
|
|
|
|
96 |
|
97 |
with gr.Blocks(css=css) as demo:
|
98 |
gr.Markdown(DESCRIPTION)
|
99 |
with gr.Tab(label="Product Image Select"):
|
100 |
with gr.Row():
|
101 |
with gr.Column():
|
102 |
-
input_img = gr.Image(label="Input Picture"
|
103 |
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
|
104 |
submit_btn = gr.Button(value="Submit")
|
105 |
with gr.Column():
|
106 |
output_text = gr.HTML(label="Output Text", elem_id="output")
|
107 |
|
108 |
-
gr.Markdown("""
|
109 |
## How to use via API
|
110 |
To use this model via API, you can follow the example code below:
|
111 |
|
112 |
-
```python
|
113 |
-
import base64
|
114 |
-
from PIL import Image
|
115 |
-
import io
|
116 |
-
import requests
|
117 |
-
|
118 |
-
# Converting image to base64
|
119 |
-
image_path = 'path_to_image.png'
|
120 |
-
with open(image_path, 'rb') as image_file:
|
121 |
-
image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
132 |
""")
|
133 |
|
134 |
submit_btn.click(process_image, [input_img, task_prompt], [output_text])
|
135 |
|
136 |
demo.load(lambda: None, inputs=None, outputs=None, js=js)
|
137 |
|
138 |
-
demo.launch(debug=True)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
3 |
import spaces
|
4 |
+
|
5 |
import io
|
|
|
6 |
from PIL import Image
|
7 |
+
import base64 # Para decodificar imagens Base64
|
8 |
import subprocess
|
|
|
|
|
9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
+
#
|
|
|
11 |
model_id = 'J-LAB/Florence-vl3'
|
12 |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
|
13 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
|
|
33 |
)
|
34 |
return parsed_answer
|
35 |
|
|
|
36 |
def process_image(image, task_prompt):
|
37 |
+
if isinstance(image, str):
|
38 |
+
if image.startswith('data:image/png;base64,'):
|
39 |
+
# Decodifica a imagem Base64
|
40 |
+
image_data = base64.b64decode(image.split(',')[1])
|
41 |
+
image = Image.open(io.BytesIO(image_data))
|
42 |
+
|
43 |
+
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
|
|
|
|
|
|
|
|
|
|
44 |
if task_prompt == 'Product Caption':
|
45 |
task_prompt = '<MORE_DETAILED_CAPTION>'
|
46 |
elif task_prompt == 'OCR':
|
47 |
task_prompt = '<OCR>'
|
48 |
|
|
|
49 |
results = run_example(task_prompt, image)
|
50 |
|
51 |
+
# Remove the key and get the text value
|
52 |
if results and task_prompt in results:
|
53 |
output_text = results[task_prompt]
|
54 |
else:
|
55 |
output_text = ""
|
56 |
|
57 |
+
# Convert newline characters to HTML line breaks
|
58 |
output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
|
59 |
|
60 |
return output_text
|
|
|
83 |
});
|
84 |
"""
|
85 |
|
86 |
+
single_task_list =[
|
87 |
+
'Product Caption', 'OCR'
|
88 |
+
]
|
89 |
|
90 |
with gr.Blocks(css=css) as demo:
|
91 |
gr.Markdown(DESCRIPTION)
|
92 |
with gr.Tab(label="Product Image Select"):
|
93 |
with gr.Row():
|
94 |
with gr.Column():
|
95 |
+
input_img = gr.Image(label="Input Picture")
|
96 |
task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
|
97 |
submit_btn = gr.Button(value="Submit")
|
98 |
with gr.Column():
|
99 |
output_text = gr.HTML(label="Output Text", elem_id="output")
|
100 |
|
101 |
+
gr.Markdown("""
|
102 |
## How to use via API
|
103 |
To use this model via API, you can follow the example code below:
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
python
|
107 |
+
!pip install gradio_client
|
108 |
+
from gradio_client import Client, handle_file
|
109 |
+
|
110 |
+
client = Client("J-LAB/Fluxi-IA")
|
111 |
+
result = client.predict(
|
112 |
+
image=handle_file('https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png'),
|
113 |
+
api_name="/process_image"
|
114 |
+
)
|
115 |
+
print(result)
|
116 |
+
|
117 |
""")
|
118 |
|
119 |
submit_btn.click(process_image, [input_img, task_prompt], [output_text])
|
120 |
|
121 |
demo.load(lambda: None, inputs=None, outputs=None, js=js)
|
122 |
|
123 |
+
demo.launch(debug=True)
|