import streamlit as st import speech_recognition as sr from transformers import pipeline import requests # Load the chatbot model from Hugging Face chatbot = pipeline("conversational", model="facebook/blenderbot-400M-distill") # Function to convert speech to text using SpeechRecognition def speech_to_text(): recognizer = sr.Recognizer() with sr.Microphone() as source: st.info("Listening...") audio = recognizer.listen(source) try: text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: return "Sorry, I could not understand the audio." except sr.RequestError: return "Speech recognition service is not available." # Function to generate avatar video using D-ID API def generate_avatar_video(text_response): api_url = "https://api.d-id.com/talk" headers = { "Authorization": "Bearer YOUR_API_KEY", # Replace with your D-ID API Key "Content-Type": "application/json" } payload = { "script": { "type": "text", "input": text_response }, "source": { "avatar_id": "your_avatar_id" # Replace with the desired avatar ID } } response = requests.post(api_url, headers=headers, json=payload) video_url = response.json().get("result_url") return video_url # Streamlit app interface st.title("🗣️ Voice-Enabled Live Video Chatbot") # Button to start recording if st.button("Speak"): user_input = speech_to_text() if user_input: st.write(f"**You:** {user_input}") # Generate chatbot response using NLP model bot_response = chatbot(user_input) response_text = bot_response[0]["generated_text"] st.write(f"**Bot:** {response_text}") # Generate avatar video with the bot's response video_url = generate_avatar_video(response_text) # Display the video response st.video(video_url)