Spaces:

capradeepgujaran
/

DocChat_n_Talk

Sleeping

File size: 3,026 Bytes

886e3e9
25a5bf9
f3d0867
49c3721
 
 
 
 
 
 
 
 
 
 
5fad48b
9c78509
25a5bf9
9c78509
 
 
25a5bf9
49c3721
9c78509
 
25a5bf9
9c78509
 
 
25a5bf9
9c78509
f9ae432
25a5bf9
9c78509
 
25a5bf9
f9ae432
 
25a5bf9
251214c
49c3721
 
 
25a5bf9
 
 
 
f9ae432
25a5bf9
f9ae432
 
25a5bf9
49c3721
25a5bf9
49c3721
25a5bf9
 
 
 
49c3721
25a5bf9
 
 
 
f3d0867
f9ae432
49c3721
25a5bf9
9c78509
49c3721
25a5bf9
49c3721
25a5bf9
9c78509
25a5bf9
9c78509
25a5bf9
f9ae432
25a5bf9

from openai import OpenAI
import os

def translate_text(client, text, target_language, model_name):
    """
    Translate the input text to the target language using specified OpenAI GPT model.
    """
    prompt = f"Translate the following text to {target_language}:\n\n{text}\n\nTranslation:"
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()

def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option):
    """
    Generate audio and text files from input text using OpenAI's TTS API.
    
    Args:
        api_key (str): OpenAI API key
        input_text (str): Text to convert to speech
        model_name (str): OpenAI model name for translation
        voice_type (str): Voice type for TTS
        voice_speed (float): Speed of speech
        language (str): Language code for synthesis
        output_option (str): Output type ('audio', 'script_text', or 'both')
    
    Returns:
        tuple: (audio_file_path, script_file_path, status_message)
    """
    if not input_text:
        return None, None, "No input text provided"
    
    if not api_key:
        return None, None, "No API key provided"
    
    try:
        client = OpenAI(api_key=api_key)
        
        # Translate the text if the target language is not the same as the input text language
        translated_text = translate_text(client, input_text, language, model_name)
        
        # Create temp directory if it doesn't exist
        temp_dir = os.path.join(os.getcwd(), 'temp')
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
        
        # Generate audio file
        audio_file = None
        if output_option in ["audio", "both"]:
            speech_response = client.audio.speech.create(
                model="tts-1-hd",
                voice=voice_type,
                input=translated_text,
                speed=float(voice_speed)
            )
            
            # Save the audio to a temporary file
            audio_path = os.path.join(temp_dir, f"output_{hash(translated_text)}_{language}.mp3")
            with open(audio_path, "wb") as f:
                for chunk in speech_response.iter_bytes():
                    f.write(chunk)
            
            audio_file = audio_path
        
        # Save the translated text as a script file
        script_file = None
        if output_option in ["script_text", "both"]:
            script_path = os.path.join(temp_dir, f"script_{hash(translated_text)}_{language}.txt")
            with open(script_path, "w", encoding='utf-8') as f:
                f.write(translated_text)
            script_file = script_path
        
        status_message = f"Generation completed successfully in {language}!"
        return audio_file, script_file, status_message
            
    except Exception as e:
        return None, None, f"Error: {str(e)}"