mrfakename
commited on
Commit
•
73dbaa9
1
Parent(s):
0675d4f
Add experimental long text w/ Tortoise
Browse files- app.py +21 -1
- requirements.txt +2 -1
app.py
CHANGED
@@ -3,6 +3,8 @@ import styletts2importable
|
|
3 |
import ljspeechimportable
|
4 |
import torch
|
5 |
import os
|
|
|
|
|
6 |
import pickle
|
7 |
theme = gr.themes.Base(
|
8 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
@@ -25,6 +27,15 @@ def synthesize(text, voice):
|
|
25 |
raise gr.Error("Text must be under 300 characters")
|
26 |
v = voice.lower()
|
27 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def clsynthesize(text, voice):
|
29 |
if text.strip() == "":
|
30 |
raise gr.Error("You must enter some text")
|
@@ -59,6 +70,15 @@ with gr.Blocks() as clone:
|
|
59 |
clbtn = gr.Button("Synthesize", variant="primary")
|
60 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
61 |
clbtn.click(clsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
with gr.Blocks() as lj:
|
63 |
with gr.Row():
|
64 |
with gr.Column(scale=1):
|
@@ -80,7 +100,7 @@ Is there a long queue on this space? Duplicate it and add a more powerful GPU to
|
|
80 |
|
81 |
**NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
|
82 |
gr.DuplicateButton("Duplicate Space")
|
83 |
-
gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'Voice Cloning', 'LJSpeech'])
|
84 |
gr.Markdown("""
|
85 |
Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
|
86 |
|
|
|
3 |
import ljspeechimportable
|
4 |
import torch
|
5 |
import os
|
6 |
+
from tortoise.utils.text import split_and_recombine_text
|
7 |
+
import numpy as np
|
8 |
import pickle
|
9 |
theme = gr.themes.Base(
|
10 |
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
|
|
27 |
raise gr.Error("Text must be under 300 characters")
|
28 |
v = voice.lower()
|
29 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
30 |
+
def longsynthesize(text, voice, progress=gr.Progress()):
|
31 |
+
if text.strip() == "":
|
32 |
+
raise gr.Error("You must enter some text")
|
33 |
+
texts = split_and_recombine_text(text)
|
34 |
+
v = voice.lower()
|
35 |
+
audios = []
|
36 |
+
for t in progress.tqdm(texts):
|
37 |
+
audios.append(styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
38 |
+
return (24000, np.concatenate(audios))
|
39 |
def clsynthesize(text, voice):
|
40 |
if text.strip() == "":
|
41 |
raise gr.Error("You must enter some text")
|
|
|
70 |
clbtn = gr.Button("Synthesize", variant="primary")
|
71 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
72 |
clbtn.click(clsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
|
73 |
+
with gr.Blocks() as longText:
|
74 |
+
with gr.Row():
|
75 |
+
with gr.Column(scale=1):
|
76 |
+
clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
77 |
+
clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300)
|
78 |
+
with gr.Column(scale=1):
|
79 |
+
clbtn = gr.Button("Synthesize", variant="primary")
|
80 |
+
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
81 |
+
clbtn.click(longsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
|
82 |
with gr.Blocks() as lj:
|
83 |
with gr.Row():
|
84 |
with gr.Column(scale=1):
|
|
|
100 |
|
101 |
**NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
|
102 |
gr.DuplicateButton("Duplicate Space")
|
103 |
+
gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
|
104 |
gr.Markdown("""
|
105 |
Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
|
106 |
|
requirements.txt
CHANGED
@@ -19,4 +19,5 @@ scipy
|
|
19 |
phonemizer
|
20 |
cached-path
|
21 |
gradio
|
22 |
-
gruut
|
|
|
|
19 |
phonemizer
|
20 |
cached-path
|
21 |
gradio
|
22 |
+
gruut
|
23 |
+
tortoise-tts
|