Spaces:

capradeepgujaran
/

DocChat_n_Talk

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 3

Commit

49c3721

•

1 Parent(s): 8460ebb

Update openai_tts_tool.py

Browse files

Files changed (1) hide show

openai_tts_tool.py +21 -7

openai_tts_tool.py CHANGED Viewed

@@ -1,6 +1,17 @@
 from openai import OpenAI
 import os
 def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
     """
     Generate audio and text files from input text using OpenAI's TTS API.
@@ -8,7 +19,7 @@ def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_s
     Args:
         api_key (str): OpenAI API key
         input_text (str): Text to convert to speech
-        model_name (str): OpenAI model name
         voice_type (str): Voice type for TTS
         voice_speed (float): Speed of speech
         language (str): Language code for synthesis
@@ -26,6 +37,9 @@ def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_s
     try:
         client = OpenAI(api_key=api_key)
         # Create temp directory if it doesn't exist
         temp_dir = os.path.join(os.getcwd(), 'temp')
         if not os.path.exists(temp_dir):
@@ -35,26 +49,26 @@ def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_s
         audio_file = None
         if output_option in ["audio", "both"]:
             speech_response = client.audio.speech.create(
-                model="tts-1",
                 voice=voice_type,
-                input=input_text,
                 speed=float(voice_speed)
             )
             # Save the audio to a temporary file
-            audio_path = os.path.join(temp_dir, f"output_{hash(input_text)}_{language}.mp3")
             with open(audio_path, "wb") as f:
                 for chunk in speech_response.iter_bytes():
                     f.write(chunk)
             audio_file = audio_path
-        # Save the input text as a script file
         script_file = None
         if output_option in ["script_text", "both"]:
-            script_path = os.path.join(temp_dir, f"script_{hash(input_text)}_{language}.txt")
             with open(script_path, "w", encoding='utf-8') as f:
-                f.write(input_text)
             script_file = script_path
         status_message = f"Generation completed successfully in {language}!"

 from openai import OpenAI
 import os
+def translate_text(client, text, target_language, model_name):
+    """
+    Translate the input text to the target language using specified OpenAI GPT model.
+    """
+    prompt = f"Translate the following text to {target_language}:\n\n{text}\n\nTranslation:"
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return response.choices[0].message.content.strip()
 def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
     """
     Generate audio and text files from input text using OpenAI's TTS API.
     Args:
         api_key (str): OpenAI API key
         input_text (str): Text to convert to speech
+        model_name (str): OpenAI model name for translation
         voice_type (str): Voice type for TTS
         voice_speed (float): Speed of speech
         language (str): Language code for synthesis
     try:
         client = OpenAI(api_key=api_key)
+        # Translate the text if the target language is not the same as the input text language
+        translated_text = translate_text(client, input_text, language, model_name)
         # Create temp directory if it doesn't exist
         temp_dir = os.path.join(os.getcwd(), 'temp')
         if not os.path.exists(temp_dir):
         audio_file = None
         if output_option in ["audio", "both"]:
             speech_response = client.audio.speech.create(
+                model="tts-1-hd",
                 voice=voice_type,
+                input=translated_text,
                 speed=float(voice_speed)
             )
             # Save the audio to a temporary file
+            audio_path = os.path.join(temp_dir, f"output_{hash(translated_text)}_{language}.mp3")
             with open(audio_path, "wb") as f:
                 for chunk in speech_response.iter_bytes():
                     f.write(chunk)
             audio_file = audio_path
+        # Save the translated text as a script file
         script_file = None
         if output_option in ["script_text", "both"]:
+            script_path = os.path.join(temp_dir, f"script_{hash(translated_text)}_{language}.txt")
             with open(script_path, "w", encoding='utf-8') as f:
+                f.write(translated_text)
             script_file = script_path
         status_message = f"Generation completed successfully in {language}!"