Spaces:
Runtime error
Runtime error
text-to-speech integration
Browse files
agent.py
CHANGED
@@ -44,7 +44,7 @@ async def call_agent(user_prompt, grade, subject):
|
|
44 |
"src_lang": "Identify the language that the user query is in, type: str",
|
45 |
"dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
|
46 |
type: Enum["hindi", "bengali", "kannada", "malayalam", "marathi", "odia", "punjabi", "tamil", "telugu", "english", "gujarati", "none"]""",
|
47 |
-
"source": "Identify the sentence that the user wants to translate or speak.
|
48 |
"response": "Your response, type: Optional[str]",
|
49 |
},
|
50 |
llm=llm,
|
@@ -69,12 +69,10 @@ async def function_caller(user_prompt, collection, client):
|
|
69 |
user_prompt = RAG_USER_PROMPT.format(data, user_prompt)
|
70 |
|
71 |
response = await llm(system_prompt, user_prompt)
|
72 |
-
|
73 |
-
return response
|
74 |
|
75 |
elif function == "translator":
|
76 |
-
return await translator(result["
|
77 |
|
78 |
elif function == "speaker":
|
79 |
-
return await speaker(result["
|
80 |
-
# return base64.b64encode(b"audio data").decode()
|
|
|
44 |
"src_lang": "Identify the language that the user query is in, type: str",
|
45 |
"dest_lang": """Identify the target language from the user query if the function is either "translator" or "speaker". If language is not found, return "none",
|
46 |
type: Enum["hindi", "bengali", "kannada", "malayalam", "marathi", "odia", "punjabi", "tamil", "telugu", "english", "gujarati", "none"]""",
|
47 |
+
"source": "Identify the sentence that the user wants to translate or speak. Else return 'none', type: Optional[str]",
|
48 |
"response": "Your response, type: Optional[str]",
|
49 |
},
|
50 |
llm=llm,
|
|
|
69 |
user_prompt = RAG_USER_PROMPT.format(data, user_prompt)
|
70 |
|
71 |
response = await llm(system_prompt, user_prompt)
|
72 |
+
return {"text": response}
|
|
|
73 |
|
74 |
elif function == "translator":
|
75 |
+
return await translator(result["source"], result["src_lang"], result["dest_lang"])
|
76 |
|
77 |
elif function == "speaker":
|
78 |
+
return await speaker(result["source"])
|
|
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import base64
|
2 |
-
import
|
3 |
|
4 |
import gradio as gr
|
5 |
from fastapi import FastAPI
|
@@ -28,44 +28,49 @@ class ChatQuery(BaseModel):
|
|
28 |
|
29 |
@app.post("/chat")
|
30 |
async def chat(query: ChatQuery):
|
31 |
-
|
32 |
-
|
33 |
-
if isinstance(result, str):
|
34 |
-
return {"text": result}
|
35 |
-
elif isinstance(result, bytes) or (isinstance(result, str) and result.startswith("data:audio")):
|
36 |
-
if isinstance(result, bytes):
|
37 |
-
audio_b64 = base64.b64encode(result).decode()
|
38 |
-
else:
|
39 |
-
audio_b64 = result.split(",")[1] # Remove the "data:audio/wav;base64," prefix
|
40 |
-
return {"audio": audio_b64}
|
41 |
-
else:
|
42 |
-
return {"error": "Unexpected result type"}
|
43 |
|
44 |
|
45 |
-
async def gradio_interface(input_text, grade, subject, chapter):
|
46 |
collection = f"{grade}_{subject.lower()}_{chapter}"
|
47 |
response = await chat(ChatQuery(query=input_text, collection=collection))
|
|
|
48 |
if "text" in response:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
else:
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
app = gr.mount_gradio_app(app, iface, path="/")
|
71 |
|
|
|
1 |
import base64
|
2 |
+
import tempfile
|
3 |
|
4 |
import gradio as gr
|
5 |
from fastapi import FastAPI
|
|
|
28 |
|
29 |
@app.post("/chat")
|
30 |
async def chat(query: ChatQuery):
|
31 |
+
return await function_caller(query.query, query.collection, hclient)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
+
async def gradio_interface(input_text, grade, subject, chapter, history):
|
35 |
collection = f"{grade}_{subject.lower()}_{chapter}"
|
36 |
response = await chat(ChatQuery(query=input_text, collection=collection))
|
37 |
+
|
38 |
if "text" in response:
|
39 |
+
output = response["text"]
|
40 |
+
history.append((input_text, output))
|
41 |
+
|
42 |
+
elif "audios" in response:
|
43 |
+
audio_data = base64.b64decode(response["audios"][0])
|
44 |
+
|
45 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audiofile:
|
46 |
+
audiofile.write(audio_data)
|
47 |
+
audiofile.flush()
|
48 |
+
|
49 |
+
return "", history, audiofile.name
|
50 |
+
|
51 |
else:
|
52 |
+
output = "Unexpected response format"
|
53 |
+
history.append((input_text, output))
|
54 |
+
|
55 |
+
return "", history, None
|
56 |
+
|
57 |
+
|
58 |
+
with gr.Blocks() as iface:
|
59 |
+
gr.Markdown("# Agentic RAG Chatbot")
|
60 |
+
gr.Markdown("Ask a question and get an answer from the chatbot. The response may be text or audio.")
|
61 |
+
|
62 |
+
with gr.Row():
|
63 |
+
grade = gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12"], label="Grade", value="9", interactive=True)
|
64 |
+
subject = gr.Dropdown(choices=["Math", "Science", "History"], label="Subject", value="Science", interactive=True)
|
65 |
+
chapter = gr.Dropdown(choices=["1", "2", "3", "4", "5", "6", "7", "9", "10", "11", "12", "13", "14", "15", "16"], label="Chapter", value="11", interactive=True)
|
66 |
+
|
67 |
+
chatbot = gr.Chatbot(label="Chat History")
|
68 |
+
msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
|
69 |
+
|
70 |
+
state = gr.State([])
|
71 |
+
audio_output = gr.Audio(label="Audio Response", type="filepath") # Separate audio output component
|
72 |
+
|
73 |
+
msg.submit(gradio_interface, inputs=[msg, grade, subject, chapter, state], outputs=[msg, chatbot, audio_output])
|
74 |
|
75 |
app = gr.mount_gradio_app(app, iface, path="/")
|
76 |
|
sarvam.py
CHANGED
@@ -36,11 +36,11 @@ async def translator(text, src, dest):
|
|
36 |
headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
|
37 |
async with session.post(url, headers=headers, json=payload) as response:
|
38 |
if response.status == 200:
|
39 |
-
output = await response.
|
40 |
-
return output
|
41 |
|
42 |
|
43 |
-
async def speaker(text, dest):
|
44 |
async with aiohttp.ClientSession() as session:
|
45 |
url = "https://api.sarvam.ai/text-to-speech"
|
46 |
|
@@ -49,7 +49,7 @@ async def speaker(text, dest):
|
|
49 |
"target_language_code": code_map[dest],
|
50 |
"speaker": "meera",
|
51 |
"pitch": 0,
|
52 |
-
"pace": 1.
|
53 |
"loudness": 1.5,
|
54 |
"speech_sample_rate": 8000,
|
55 |
"enable_preprocessing": True,
|
@@ -58,5 +58,7 @@ async def speaker(text, dest):
|
|
58 |
headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
|
59 |
async with session.post(url, headers=headers, json=payload) as response:
|
60 |
if response.status == 200:
|
61 |
-
output = await response.
|
62 |
return output
|
|
|
|
|
|
36 |
headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
|
37 |
async with session.post(url, headers=headers, json=payload) as response:
|
38 |
if response.status == 200:
|
39 |
+
output = await response.json()
|
40 |
+
return {"text": output["translated_text"]}
|
41 |
|
42 |
|
43 |
+
async def speaker(text, dest="hindi"):
|
44 |
async with aiohttp.ClientSession() as session:
|
45 |
url = "https://api.sarvam.ai/text-to-speech"
|
46 |
|
|
|
49 |
"target_language_code": code_map[dest],
|
50 |
"speaker": "meera",
|
51 |
"pitch": 0,
|
52 |
+
"pace": 1.25,
|
53 |
"loudness": 1.5,
|
54 |
"speech_sample_rate": 8000,
|
55 |
"enable_preprocessing": True,
|
|
|
58 |
headers = {"Content-Type": "application/json", "api-subscription-key": os.getenv("SARVAM_API_KEY")}
|
59 |
async with session.post(url, headers=headers, json=payload) as response:
|
60 |
if response.status == 200:
|
61 |
+
output = await response.json()
|
62 |
return output
|
63 |
+
else:
|
64 |
+
print(response.status)
|