Spaces:

bisoye
/

voice-based-pdf-summarizer

Sleeping

File size: 1,502 Bytes

import gradio as gr
from transformers import pipeline
from gtts import gTTS
from pydub import AudioSegment


#text to sppech function
def text_to_speech(text):
    # Convert text to speech with a US accent using gTTS
    tts = gTTS(text=text, lang='en', tld='us', slow=False)
    tts.save('temp.mp3')

    # Load the audio file
    audio = AudioSegment.from_file('temp.mp3')

    # Adjust the speed to approximately 170 wpm
    playback_speed = 1.20
    audio = audio.speedup(playback_speed=playback_speed)

    # Save and return the adjusted audio file
    final_filename = 'text_to_speech.mp3'
    audio.export(final_filename, format='mp3')

    return final_filename


def process_files():
    return (gr.update(interactive=True,
                      elem_id='summary_button'),
    gr.update(interactive = True, elem_id = 'summarization_method')
    )



def get_summarization_method(option):
    return option




def text_to_audio(text, model_name="facebook/fastspeech2-en-ljspeech"):
    # Initialize the TTS pipeline
    tts_pipeline = pipeline("text-to-speech", model=model_name)
    
    # Generate the audio from text
    audio = tts_pipeline(text)
    
    # Save the audio to a file
    audio_path = "output.wav"
    with open(audio_path, "wb") as file:
        file.write(audio["wav"])
    
    return audio_path


def generate_output(method,  file):

    summary_text = summarize_file(method, file)
    audio_summary = text_to_speech(summary_text)

    return summary_text, audio_summary