Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
import markdown | |
import requests | |
import torch | |
from PIL import Image | |
from transformers import MllamaForConditionalGeneration, AutoProcessor | |
SYSTEM_INSTRUCTION="You are a medical report interpreter. Your task is to analyze the provided medical reports, identify key medical terms, diagnoses, or abnormalities, and provide a clear interpretation. Based on your analysis, generate a detailed summary that includes an explanation of the findings, recommended actions, and any additional insights for the patient or healthcare provider. Ensure your output is structured and easily understandable for both professionals and non-professionals." | |
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
model = MllamaForConditionalGeneration.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
) | |
processor = AutoProcessor.from_pretrained(model_id) | |
def extract_assistant_reply(input_string): | |
# Define the tag that indicates the start of the assistant's reply | |
start_tag = "<|start_header_id|>assistant<|end_header_id|>" | |
# Find the position where the assistant's reply starts | |
start_index = input_string.find(start_tag) | |
if start_index == -1: | |
return "Assistant's reply not found." | |
start_index += len(start_tag) | |
# Extract everything after the start tag | |
assistant_reply = input_string[start_index:].strip() | |
return assistant_reply | |
def med_interpreter(image): | |
messages = [ | |
{"role": "user", "content": [ | |
{"type": "image"}, | |
{"type": "text", "text": SYSTEM_INSTRUCTION} | |
]} | |
] | |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
inputs = processor(image, input_text, return_tensors="pt").to(model.device) | |
# Generate the output from the model | |
output = model.generate(**inputs, max_new_tokens=4000) | |
print(output) | |
markdown_text = processor.decode(output[0]) | |
print(markdown_text) | |
markdown_text=extract_assistant_reply(markdown_text) | |
html_output = markdown.markdown(markdown_text) | |
return html_output | |
# Gradio UI | |
interface = gr.Interface( | |
fn=med_interpreter, | |
inputs=gr.Image(type="pil", label="Upload an image of the medical report"), | |
outputs=gr.HTML(), | |
title="Medical Report Insights", | |
description="Upload an image of your medical report to get the interperation of it" | |
) | |
# Launch the UI | |
interface.launch() | |