Spaces:

datascientist22
/

real-robot-speaking

Sleeping

App Files Files Community

real-robot-speaking / app.py

datascientist22

Update app.py

1b365e4 verified 14 days ago

raw

history blame

No virus

2.02 kB

	import streamlit as st
	import speech_recognition as sr
	from transformers import pipeline
	import requests

	# Load the chatbot model from Hugging Face
	chatbot = pipeline("conversational", model="facebook/blenderbot-400M-distill")

	# Function to convert speech to text using SpeechRecognition
	def speech_to_text():
	recognizer = sr.Recognizer()
	with sr.Microphone() as source:
	st.info("Listening...")
	audio = recognizer.listen(source)
	try:
	text = recognizer.recognize_google(audio)
	return text
	except sr.UnknownValueError:
	return "Sorry, I could not understand the audio."
	except sr.RequestError:
	return "Speech recognition service is not available."

	# Function to generate avatar video using D-ID API
	def generate_avatar_video(text_response):
	api_url = "https://api.d-id.com/talk"
	headers = {
	"Authorization": "Bearer YOUR_API_KEY", # Replace with your D-ID API Key
	"Content-Type": "application/json"
	}
	payload = {
	"script": {
	"type": "text",
	"input": text_response
	},
	"source": {
	"avatar_id": "your_avatar_id" # Replace with the desired avatar ID
	}
	}
	response = requests.post(api_url, headers=headers, json=payload)
	video_url = response.json().get("result_url")
	return video_url

	# Streamlit app interface
	st.title("🗣️ Voice-Enabled Live Video Chatbot")

	# Button to start recording
	if st.button("Speak"):
	user_input = speech_to_text()
	if user_input:
	st.write(f"You: {user_input}")

	# Generate chatbot response using NLP model
	bot_response = chatbot(user_input)
	response_text = bot_response[0]["generated_text"]
	st.write(f"Bot: {response_text}")

	# Generate avatar video with the bot's response
	video_url = generate_avatar_video(response_text)

	# Display the video response
	st.video(video_url)