Spaces:

omkar334
/

agentic_rag

Runtime error

App Files Files Community

omkar334 commited on Sep 28

Commit

c425f6c

•

1 Parent(s): 2468331

text-to-speech integration

Browse files

Files changed (3) hide show

agent.py +4 -6
app.py +38 -33
sarvam.py +7 -5

agent.py CHANGED Viewed

@@ -44,7 +44,7 @@ async def call_agent(user_prompt, grade, subject):
             "src_lang": "Identify the language that the user query is in, type: str",
             "dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
                                     type: Enum["hindi", "bengali", "kannada", "malayalam", "marathi", "odia", "punjabi", "tamil", "telugu", "english", "gujarati", "none"]""",
-            "source": "Identify the sentence that the user wants to translate or speak. Retu 'none', type: Optional[str]",
             "response": "Your response, type: Optional[str]",
         },
         llm=llm,
@@ -69,12 +69,10 @@ async def function_caller(user_prompt, collection, client):
         user_prompt = RAG_USER_PROMPT.format(data, user_prompt)
         response = await llm(system_prompt, user_prompt)
-        return response
     elif function == "translator":
-        return await translator(result["response"], result["src_lang"], result["dest_lang"])
     elif function == "speaker":
-        return await speaker(result["response"], result["dest_lang"])
-    # return base64.b64encode(b"audio data").decode()

             "src_lang": "Identify the language that the user query is in, type: str",
             "dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
                                     type: Enum["hindi", "bengali", "kannada", "malayalam", "marathi", "odia", "punjabi", "tamil", "telugu", "english", "gujarati", "none"]""",
+            "source": "Identify the sentence that the user wants to translate or speak. Else return 'none', type: Optional[str]",
             "response": "Your response, type: Optional[str]",
         },
         llm=llm,
         user_prompt = RAG_USER_PROMPT.format(data, user_prompt)
         response = await llm(system_prompt, user_prompt)
+        return {"text": response}
     elif function == "translator":
+        return await translator(result["source"], result["src_lang"], result["dest_lang"])
     elif function == "speaker":
+        return await speaker(result["source"])

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import base64
-import io
 import gradio as gr
 from fastapi import FastAPI
@@ -28,44 +28,49 @@ class ChatQuery(BaseModel):
 @app.post("/chat")
 async def chat(query: ChatQuery):
-    result = await function_caller(query.query, query.collection, hclient)
-    if isinstance(result, str):
-        return {"text": result}
-    elif isinstance(result, bytes) or (isinstance(result, str) and result.startswith("data:audio")):
-        if isinstance(result, bytes):
-            audio_b64 = base64.b64encode(result).decode()
-        else:
-            audio_b64 = result.split(",")[1]  # Remove the "data:audio/wav;base64," prefix
-        return {"audio": audio_b64}
-    else:
-        return {"error": "Unexpected result type"}
-async def gradio_interface(input_text, grade, subject, chapter):
     collection = f"{grade}_{subject.lower()}_{chapter}"
     response = await chat(ChatQuery(query=input_text, collection=collection))
     if "text" in response:
-        return response["text"], None
-    elif "audio" in response:
-        audio_data = base64.b64decode(response["audio"])
-        return "Audio response generated", (44100, io.BytesIO(audio_data))
     else:
-        return "Unexpected response format", None
-iface = gr.Interface(
-    fn=gradio_interface,
-    inputs=[
-        gr.Textbox(lines=2, placeholder="Enter your question here..."),
-        gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12"], label="Grade", value="9", interactive=True),
-        gr.Dropdown(choices=["Math", "Science", "History"], label="Subject", value="Science", interactive=True),
-        gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12", "13", "14", "15", "16"], label="Chapter", value="11", interactive=True),
-    ],
-    outputs=[gr.Textbox(label="Response"), gr.Audio(label="Audio Response")],
-    title="Agentic RAG Chatbot",
-    description="Ask a question and get an answer from the chatbot. The response may be text or audio.",
-)
 app = gr.mount_gradio_app(app, iface, path="/")

 import base64
+import tempfile
 import gradio as gr
 from fastapi import FastAPI
 @app.post("/chat")
 async def chat(query: ChatQuery):
+    return await function_caller(query.query, query.collection, hclient)
+async def gradio_interface(input_text, grade, subject, chapter, history):
     collection = f"{grade}_{subject.lower()}_{chapter}"
     response = await chat(ChatQuery(query=input_text, collection=collection))
     if "text" in response:
+        output = response["text"]
+        history.append((input_text, output))
+    elif "audios" in response:
+        audio_data = base64.b64decode(response["audios"][0])
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audiofile:
+            audiofile.write(audio_data)
+            audiofile.flush()
+        return "", history, audiofile.name
     else:
+        output = "Unexpected response format"
+        history.append((input_text, output))
+    return "", history, None
+with gr.Blocks() as iface:
+    gr.Markdown("# Agentic RAG Chatbot")
+    gr.Markdown("Ask a question and get an answer from the chatbot. The response may be text or audio.")
+    with gr.Row():
+        grade = gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12"], label="Grade", value="9", interactive=True)
+        subject = gr.Dropdown(choices=["Math", "Science", "History"], label="Subject", value="Science", interactive=True)
+        chapter = gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12", "13", "14", "15", "16"], label="Chapter", value="11", interactive=True)
+    chatbot = gr.Chatbot(label="Chat History")
+    msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
+    state = gr.State([])
+    audio_output = gr.Audio(label="Audio Response", type="filepath")  # Separate audio output component
+    msg.submit(gradio_interface, inputs=[msg, grade, subject, chapter, state], outputs=[msg, chatbot, audio_output])
 app = gr.mount_gradio_app(app, iface, path="/")

sarvam.py CHANGED Viewed

@@ -36,11 +36,11 @@ async def translator(text, src, dest):
         headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
         async with session.post(url, headers=headers, json=payload) as response:
             if response.status == 200:
-                output = await response.text()
-                return output
-async def speaker(text, dest):
     async with aiohttp.ClientSession() as session:
         url = "https://api.sarvam.ai/text-to-speech"
@@ -49,7 +49,7 @@ async def speaker(text, dest):
             "target_language_code": code_map[dest],
             "speaker": "meera",
             "pitch": 0,
-            "pace": 1.65,
             "loudness": 1.5,
             "speech_sample_rate": 8000,
             "enable_preprocessing": True,
@@ -58,5 +58,7 @@ async def speaker(text, dest):
         headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
         async with session.post(url, headers=headers, json=payload) as response:
             if response.status == 200:
-                output = await response.read()
                 return output

         headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
         async with session.post(url, headers=headers, json=payload) as response:
             if response.status == 200:
+                output = await response.json()
+                return {"text": output["translated_text"]}
+async def speaker(text, dest="hindi"):
     async with aiohttp.ClientSession() as session:
         url = "https://api.sarvam.ai/text-to-speech"
             "target_language_code": code_map[dest],
             "speaker": "meera",
             "pitch": 0,
+            "pace": 1.25,
             "loudness": 1.5,
             "speech_sample_rate": 8000,
             "enable_preprocessing": True,
         headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
         async with session.post(url, headers=headers, json=payload) as response:
             if response.status == 200:
+                output = await response.json()
                 return output
+            else:
+                print(response.status)