J-LAB commited on
Commit
94180ae
1 Parent(s): 5df4b4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -44
app.py CHANGED
@@ -1,15 +1,13 @@
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
 
4
  import io
5
- import base64 # Adicionando a biblioteca base64 para decodificação
6
  from PIL import Image
 
7
  import subprocess
8
-
9
- # Instalando a dependência flash-attn se necessário
10
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
-
12
- # Carregando o modelo e o processador
13
  model_id = 'J-LAB/Florence-vl3'
14
  model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
15
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
@@ -35,35 +33,28 @@ def run_example(task_prompt, image):
35
  )
36
  return parsed_answer
37
 
38
- # Função para processar imagens, agora suportando Base64
39
  def process_image(image, task_prompt):
40
- # Verifica se a imagem é uma string base64
41
- if isinstance(image, str) and image.startswith("data:image"):
42
- # Extraindo a parte base64 da string
43
- base64_image = image.split(",")[1]
44
- # Decodificando a imagem base64
45
- image = Image.open(io.BytesIO(base64.b64decode(base64_image)))
46
- elif isinstance(image, bytes):
47
- image = Image.open(io.BytesIO(image))
48
- else:
49
- image = Image.fromarray(image) # Convertendo um array NumPy para imagem PIL, se aplicável
50
-
51
- # Mapeando os prompts de tarefas
52
  if task_prompt == 'Product Caption':
53
  task_prompt = '<MORE_DETAILED_CAPTION>'
54
  elif task_prompt == 'OCR':
55
  task_prompt = '<OCR>'
56
 
57
- # Chamando o exemplo com a imagem processada e o prompt da tarefa
58
  results = run_example(task_prompt, image)
59
 
60
- # Extraindo o texto gerado a partir dos resultados
61
  if results and task_prompt in results:
62
  output_text = results[task_prompt]
63
  else:
64
  output_text = ""
65
 
66
- # Convertendo quebras de linha para quebras de linha HTML
67
  output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
68
 
69
  return output_text
@@ -92,47 +83,41 @@ document.querySelector('button').addEventListener('click', function() {
92
  });
93
  """
94
 
95
- single_task_list = ['Product Caption', 'OCR']
 
 
96
 
97
  with gr.Blocks(css=css) as demo:
98
  gr.Markdown(DESCRIPTION)
99
  with gr.Tab(label="Product Image Select"):
100
  with gr.Row():
101
  with gr.Column():
102
- input_img = gr.Image(label="Input Picture", source="upload", type="pil") # Suporte a PIL images
103
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
104
  submit_btn = gr.Button(value="Submit")
105
  with gr.Column():
106
  output_text = gr.HTML(label="Output Text", elem_id="output")
107
 
108
- gr.Markdown("""
109
  ## How to use via API
110
  To use this model via API, you can follow the example code below:
111
 
112
- ```python
113
- import base64
114
- from PIL import Image
115
- import io
116
- import requests
117
-
118
- # Converting image to base64
119
- image_path = 'path_to_image.png'
120
- with open(image_path, 'rb') as image_file:
121
- image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
122
 
123
- # Preparing the payload
124
- payload = {
125
- "image": f"data:image/png;base64,{image_base64}",
126
- "task_prompt": "Product Caption"
127
- }
128
-
129
- response = requests.post("http://your-space-url-here", json=payload)
130
- print(response.json())
131
- ```
 
 
132
  """)
133
 
134
  submit_btn.click(process_image, [input_img, task_prompt], [output_text])
135
 
136
  demo.load(lambda: None, inputs=None, outputs=None, js=js)
137
 
138
- demo.launch(debug=True)
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
4
+
5
  import io
 
6
  from PIL import Image
7
+ import base64 # Para decodificar imagens Base64
8
  import subprocess
 
 
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
+ #
 
11
  model_id = 'J-LAB/Florence-vl3'
12
  model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
13
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 
33
  )
34
  return parsed_answer
35
 
 
36
  def process_image(image, task_prompt):
37
+ if isinstance(image, str):
38
+ if image.startswith('data:image/png;base64,'):
39
+ # Decodifica a imagem Base64
40
+ image_data = base64.b64decode(image.split(',')[1])
41
+ image = Image.open(io.BytesIO(image_data))
42
+
43
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
 
 
 
 
 
44
  if task_prompt == 'Product Caption':
45
  task_prompt = '<MORE_DETAILED_CAPTION>'
46
  elif task_prompt == 'OCR':
47
  task_prompt = '<OCR>'
48
 
 
49
  results = run_example(task_prompt, image)
50
 
51
+ # Remove the key and get the text value
52
  if results and task_prompt in results:
53
  output_text = results[task_prompt]
54
  else:
55
  output_text = ""
56
 
57
+ # Convert newline characters to HTML line breaks
58
  output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
59
 
60
  return output_text
 
83
  });
84
  """
85
 
86
+ single_task_list =[
87
+ 'Product Caption', 'OCR'
88
+ ]
89
 
90
  with gr.Blocks(css=css) as demo:
91
  gr.Markdown(DESCRIPTION)
92
  with gr.Tab(label="Product Image Select"):
93
  with gr.Row():
94
  with gr.Column():
95
+ input_img = gr.Image(label="Input Picture")
96
  task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Product Caption")
97
  submit_btn = gr.Button(value="Submit")
98
  with gr.Column():
99
  output_text = gr.HTML(label="Output Text", elem_id="output")
100
 
101
+ gr.Markdown("""
102
  ## How to use via API
103
  To use this model via API, you can follow the example code below:
104
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ python
107
+ !pip install gradio_client
108
+ from gradio_client import Client, handle_file
109
+
110
+ client = Client("J-LAB/Fluxi-IA")
111
+ result = client.predict(
112
+ image=handle_file('https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png'),
113
+ api_name="/process_image"
114
+ )
115
+ print(result)
116
+
117
  """)
118
 
119
  submit_btn.click(process_image, [input_img, task_prompt], [output_text])
120
 
121
  demo.load(lambda: None, inputs=None, outputs=None, js=js)
122
 
123
+ demo.launch(debug=True)