LocalScribe1

Running on Zero

App Files Files Community

KG0101 commited on 9 days ago

Commit

deca1bc

•

1 Parent(s): 02be256

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -38

app.py CHANGED Viewed

@@ -1,20 +1,24 @@
 import spaces
 import torch
 import gradio as gr
-from transformers import pipeline
-from llama_cpp import Llama
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
-# Load the Llama model directly from Hugging Face
-llm = Llama.from_pretrained(
-    repo_id="MaziyarPanahi/Qwen2-7B-Instruct-GGUF",
-    filename="Qwen2-7B-Instruct.Q4_K_M.gguf"
-)
 # Initialize the transcription pipeline
 pipe = pipeline(
@@ -24,19 +28,6 @@ pipe = pipeline(
     device=device,
 )
-# Prompt for SOAP note generation
-sys_prompt = "You are a world class clinical assistant."
-task_prompt = """
-Convert the following transcribed conversation into a clinical SOAP note.
-The text includes dialogue between a physician and a patient. Please clearly distinguish between the physician's and the patient's statements.
-Extract and organize the information into the relevant sections of a SOAP note:
-- Subjective (symptoms and patient statements),
-- Objective (clinical findings and observations, these might be missing if the physician has not conducted a physical exam or has not verbally stated findings),
-- Assessment (diagnosis or potential diagnoses, objectively provide a top 5 most likely diagnosis based on just the subjective findings, and use the objective findings if available),
-- Plan (treatment and follow-up).
-Ensure the note is concise, clear, and accurately reflects the conversation.
-"""
 # Function to transcribe audio inputs
 @spaces.GPU
 def transcribe(inputs, task):
@@ -45,25 +36,69 @@ def transcribe(inputs, task):
     text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return text
-# Function to generate SOAP notes using Llama model
-def generate_soap(transcribed_text):
-    # Format the conversation for the Llama model
-    prompt = [
-        {"role": "system", "content": sys_prompt},
-        {"role": "user", "content": f"{task_prompt}\n{transcribed_text}"}
-    ]
-    # Generate a response
-    response = llm.create_chat_completion(messages=prompt, temperature=0.7, max_tokens=2048)
-    return response["choices"][0]["message"]["content"]
-# Gradio Interfaces for different inputs
 demo = gr.Blocks(theme=gr.themes.Ocean())
 # Interface for microphone or file transcription
 mf_transcribe = gr.Interface(
     fn=transcribe,
-    inputs=[gr.Audio(sources="microphone", type="filepath"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
     outputs="text",
     title="Audio Transcribe",
     description="Transcribe long-form microphone or audio inputs."
@@ -71,22 +106,36 @@ mf_transcribe = gr.Interface(
 file_transcribe = gr.Interface(
     fn=transcribe,
-    inputs=[gr.Audio(sources="upload", type="filepath", label="Audio file"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
     outputs="text",
     title="Audio Transcribe"
 )
-# SOAP Note generation interface
 soap_note = gr.Interface(
     fn=generate_soap,
-    inputs="text",
     outputs="text",
     title="Generate Clinical SOAP Note",
-    description="Convert transcribed conversation to a clinical SOAP note with structured sections (Subjective, Objective, Assessment, Plan)."
 )
-# Tabbed interface with transcription and SOAP note generation
 with demo:
-    gr.TabbedInterface([mf_transcribe, file_transcribe, soap_note], ["Microphone", "Audio file", "SOAP Note"])
 demo.queue().launch(ssr_mode=False)

 import spaces
 import torch
 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+from threading import Thread
+from typing import Iterator
+import os
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = 0 if torch.cuda.is_available() else "cpu"
+# Initialize the LLM
+if torch.cuda.is_available():
+    llm_model_id = "NousResearch/Meta-Llama-3.1-8B-Instruct"
+    llm = AutoModelForCausalLM.from_pretrained(llm_model_id, torch_dtype=torch.float16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
+    tokenizer.use_default_system_prompt = False
 # Initialize the transcription pipeline
 pipe = pipeline(
     device=device,
 )
 # Function to transcribe audio inputs
 @spaces.GPU
 def transcribe(inputs, task):
     text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return text
+# Function to generate SOAP notes using LLM
+@spaces.GPU
+def generate_soap(
+    transcribed_text: str,
+    system_prompt: str = "You are a world class clinical assistant.",
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+) -> Iterator[str]:
+    task_prompt = """
+    Convert the following transcribed conversation into a clinical SOAP note.
+    The text includes dialogue between a physician and a patient. Please clearly distinguish between the physician's and the patient's statements.
+    Extract and organize the information into the relevant sections of a SOAP note:
+    - Subjective (symptoms and patient statements),
+    - Objective (clinical findings and observations),
+    - Assessment (diagnosis or potential diagnoses),
+    - Plan (treatment and follow-up).
+    Ensure the note is concise, clear, and accurately reflects the conversation.
+    """
+    conversation = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": f"{task_prompt}\n\nTranscribed conversation:\n{transcribed_text}"}
+    ]
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(llm.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=llm.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
+# Gradio Interfaces
 demo = gr.Blocks(theme=gr.themes.Ocean())
 # Interface for microphone or file transcription
 mf_transcribe = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Audio(sources="microphone", type="filepath"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
+    ],
     outputs="text",
     title="Audio Transcribe",
     description="Transcribe long-form microphone or audio inputs."
 file_transcribe = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.Audio(sources="upload", type="filepath", label="Audio file"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
+    ],
     outputs="text",
     title="Audio Transcribe"
 )
+# SOAP Note generation interface with additional parameters
 soap_note = gr.Interface(
     fn=generate_soap,
+    inputs=[
+        gr.Textbox(label="Transcribed Text", lines=10),
+        gr.Textbox(label="System Prompt", lines=2, value="You are a world class clinical assistant."),
+        gr.Slider(label="Max new tokens", minimum=1, maximum=2048, value=1024, step=1),
+        gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, value=0.6, step=0.1),
+        gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, value=0.9, step=0.05),
+        gr.Slider(label="Top-k", minimum=1, maximum=1000, value=50, step=1),
+        gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.05)
+    ],
     outputs="text",
     title="Generate Clinical SOAP Note",
+    description="Convert transcribed conversation to a clinical SOAP note with structured sections."
 )
+# Tabbed interface
 with demo:
+    gr.TabbedInterface(
+        [mf_transcribe, file_transcribe, soap_note],
+        ["Microphone", "Audio file", "SOAP Note"]
+    )
 demo.queue().launch(ssr_mode=False)