# # LEPSZA JAKOŚĆ # # -> W VSC najedź kursorem na Communicate i naciśnij Ctrl + Click, aby przejść do communicate.py # # -> Wyszukaj w pliku communicate.py audio-24khz-48kbitrate-mono-mp3 i zamień na audio-24khz-96kbitrate-mono-mp3 # pip install pyttsx3 # pip install pysrt # pip install wave # pip install asyncio # pip install edge_tts # pip install termcolor import os import time import pysrt import pyttsx3 import wave import asyncio import edge_tts from termcolor import cprint from pydub import AudioSegment import nltk import subprocess import contextlib import winsound def tts_local(choice): def convert_srt_to_wav(dir_path): # Inicjalizacja silnika mowy engine = pyttsx3.init() voices = engine.getProperty('voices') for voice in voices: if voice.name == 'Vocalizer Expressive Zosia Harpo 22kHz': engine.setProperty('voice', voice.id) engine.setProperty('rate', 200) # Szybkość mówienia engine.setProperty('volume', 0.7) # Głośność # Konwersja wszystkich plików srt w katalogu for file in os.listdir(dir_path): if file.endswith(".srt"): # Pobranie plików .srt subtitles = pysrt.open(os.path.join( dir_path, file), encoding='ANSI') # Odczytanie napisów i zapisanie mowy do pliku WAV output_file = os.path.splitext(file)[0] + ".wav" with wave.open(output_file, 'wb') as wav_file: wav_file.setnchannels(1) # Mono wav_file.setsampwidth(2) # 16-bit wav_file.setframerate(22500) # 22kHz cprint('\n' + subtitles.path + '\n', 'green') for i, subtitle in enumerate(subtitles, start=1): print( f"{i}\n{subtitle.start.to_time().strftime('%H:%M:%S.%f')[:-3]} --> {subtitle.end.to_time().strftime('%H:%M:%S.%f')[:-3]}\n{subtitle.text}\n") start_time = subtitle.start.to_time() start_time = start_time.hour * 3600 + start_time.minute * \ 60 + start_time.second + start_time.microsecond / 1000000 # Zapisanie mowy do pliku WAV engine.save_to_file(subtitle.text, "temp.wav") engine.runAndWait() # Dodanie pustego frame'a do pliku WAV, jeśli jest to wymagane framerate = wav_file.getframerate() nframes = wav_file.getnframes() current_time = nframes / float(framerate) if start_time > current_time: empty_frame_duration = int( (start_time - current_time) * framerate) empty_frame = b'\x00' * empty_frame_duration * 2 wav_file.writeframes(empty_frame) # Dodanie mowy do pliku WAV with wave.open("temp.wav", 'rb') as temp_file: data = temp_file.readframes(temp_file.getnframes()) wav_file.writeframes(data) # Usunięcie pliku tymczasowego if os.path.exists("temp.wav"): os.remove("temp.wav") def convert_srt_to_wav_balabolka(dir_path): # BALABOLKA - BALKON.EXE for file in os.listdir(dir_path): if file.endswith(".srt"): file_path = os.path.join(dir_path, file) with contextlib.suppress(UnicodeDecodeError): subtitles = pysrt.open(file_path, encoding='ANSI') cprint('\n' + subtitles.path + '\n', 'green') for i, subtitle in enumerate(subtitles, start=1): print( f"{i}\n{subtitle.start.to_time().strftime('%H:%M:%S.%f')[:-3]} --> {subtitle.end.to_time().strftime('%H:%M:%S.%f')[:-3]}\n{subtitle.text}\n") command = f'balcon -f {file} -w {os.path.splitext(file)[0]}.wav -n "IVONA 2 Agnieszka" -s 5 -v 70' subprocess.call(command, shell=True) dir_path = os.path.dirname(os.path.realpath(__file__)) if choice == 1: convert_srt_to_wav(dir_path) if choice == 2: convert_srt_to_wav_balabolka(dir_path) def tts_edge_online(choice): async def generate_speech(subtitle, voice, output_file, rate, volume): communicate = edge_tts.Communicate( subtitle.text, voice, rate=rate, volume=volume) await communicate.save(output_file) async def generate_wav_files(subtitles, voice, rate, volume): tasks = [] mp3_files = [] file_name = os.path.splitext(subtitles.path)[0] for i, subtitle in enumerate(subtitles, start=1): output_file = f"{file_name}_{i}.mp3" mp3_files.append(output_file) tasks.append(asyncio.create_task(generate_speech( subtitle, voice, output_file, rate, volume))) if i % 50 == 0: await asyncio.gather(*tasks) tasks = [] # Poczekaj 5 sekund przed kontynuacją generowania plików time.sleep(2) await asyncio.gather(*tasks) return mp3_files def merge_audio_files(mp3_files, subtitles): file_name = os.path.splitext(subtitles.path)[0] with wave.open(f"{file_name}.wav", 'wb') as wav_file: wav_file.setnchannels(1) wav_file.setsampwidth(2) wav_file.setframerate(24000) audio_segments = [] cprint('\n' + subtitles.path + '\n', 'green') for i, mp3_file in enumerate(mp3_files, start=1): print( f"{i}\n{subtitles[i-1].start.to_time().strftime('%H:%M:%S.%f')[:-3]} --> {subtitles[i-1].end.to_time().strftime('%H:%M:%S.%f')[:-3]}\n{subtitles[i-1].text}\n") mp3_file_path = os.path.join(dir_path, mp3_file) if os.path.isfile(mp3_file_path): start_time = subtitles[i-1].start.to_time() start_time = start_time.hour * 3600 + start_time.minute * \ 60 + start_time.second + start_time.microsecond / 1000000 sound = AudioSegment.from_file( mp3_file_path, format="mp3") audio_segments.append(sound) os.remove(mp3_file_path) framerate = wav_file.getframerate() nframes = wav_file.getnframes() current_time = nframes / float(framerate) if current_time < start_time: empty_frame_duration = int( (start_time - current_time) * framerate) empty_frame = b'\x00' * empty_frame_duration * 2 wav_file.writeframes(empty_frame) sound_data = sound.raw_data wav_file.writeframes(sound_data) wav_file.close() dir_path = os.path.dirname(os.path.realpath(__file__)) for file in os.listdir(dir_path): # Zmienne silnika mowy Edge TTS if choice == 3: VOICE = "pl-PL-ZofiaNeural" if choice == 4: VOICE = "pl-PL-MarekNeural" RATE = "+40%" VOLUME = "+0%" # RATE = "+0%" # VOLUME = "+0%" if file.endswith(".srt"): subtitles = pysrt.open(os.path.join( dir_path, file), encoding='ANSI') mp3_files = asyncio.run(generate_wav_files( subtitles, VOICE, RATE, VOLUME)) merge_audio_files(mp3_files, subtitles) def text_to_subtitles(): dir_path = os.path.dirname(os.path.realpath(__file__)) def clean_text(file_path): with open(file_path, 'r', encoding='utf8') as f: lines = f.readlines() with open(file_path, 'w', encoding='utf8') as f: for line in lines: line = line.strip() f.write(line + "\n") def erasing_words(file_path): words = ["(", ")", "[", "]", "<", ">", "{", "}", "\"", "『", "』", "…", "「", "」", "„", "”", "«", "»", "...", "*", "'", "〈", "〉", ""] with open(file_path, 'r', encoding='utf8') as f: lines = f.readlines() with open(file_path, 'w', encoding='utf8') as f: for line in lines: for word in words: line = line.replace(word, "") f.write(line) def txt_to_srt(file_path): print("txt to srt") with open(file_path, "r", encoding="utf-8") as f: text = f.read() text = text.split("\n") text = [x for x in text if x != ""] print(text) subs = pysrt.SubRipFile() index = 1 for line in text: sentences = nltk.sent_tokenize(line) for sentence in sentences: if all(c in '.,?!:;-–—' for c in sentence): continue subs.append(pysrt.SubRipItem(index, start='00:00:00,000', end='00:00:00,000', text=sentence)) index += 1 srt_filename = os.path.splitext(file_path)[0] + "_.srt" subs.save(srt_filename, encoding='utf-8') for file in os.listdir(dir_path): if file.endswith(".txt"): file_path = os.path.join(dir_path, file) clean_text(file_path) erasing_words(file_path) txt_to_srt(file_path) for file_name in os.listdir(dir_path): if file_name.endswith(".srt"): file_path = os.path.join(dir_path, file_name) with contextlib.suppress(UnicodeDecodeError): with open(file_path, "r", encoding="utf-8") as file: content = file.read() with open(file_path, "w", encoding="ANSI") as file: file.write(content) def main(): start_time = time.time() # red green yellow white attrs=['bold'] cprint("╚═══ Multimedia Magic – Audio Visual Heaven ═══╝", 'white', attrs=['bold']) cprint("") cprint("Wybierz jedną z poniższych opcji (tylko .txt, .srt):") cprint("1. TTS - Zosia - Harpo") cprint("2. TTS - Agnieszka - Ivona") cprint("3. TTS - Zofia - Edge") cprint("4. TTS - Marek - Edge") choice = input("Wybierz numer opcji: ") text_to_subtitles() if choice == '1': tts_local(choice=1) if choice == '2': tts_local(choice=2) if choice == '3': tts_edge_online(choice=3) if choice == '4': tts_edge_online(choice=4) # Mierz czas print("--- %s seconds ---" % (time.time() - start_time)) print("--- %s minutes ---" % ((time.time() - start_time) / 60)) print("--- %s hours ---" % ((time.time() - start_time) / 3600)) winsound.PlaySound('complete.wav', winsound.SND_FILENAME) if __name__ == "__main__": main()