FrankZxShen
commited on
Commit
•
9defba9
1
Parent(s):
32f6964
Update app.py
Browse files
app.py
CHANGED
@@ -117,6 +117,10 @@ if __name__ == "__main__":
|
|
117 |
parser = argparse.ArgumentParser()
|
118 |
parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
|
119 |
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
120 |
for info in models_info:
|
121 |
lang = info['languages']
|
122 |
examples = info['examples']
|
@@ -154,85 +158,113 @@ if __name__ == "__main__":
|
|
154 |
gr.Markdown("# TTS&Voice Conversion for Princess Connect! Re:Dive\n\n"
|
155 |
)
|
156 |
with gr.Tabs():
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
)
|
219 |
-
with gr.Tab("Voice Conversion"):
|
220 |
-
for i, (description, speakers, vc_fn) in enumerate(
|
221 |
-
models_vc):
|
222 |
-
gr.Markdown("""
|
223 |
-
录制或上传声音,并选择要转换的音色。
|
224 |
-
""")
|
225 |
-
with gr.Column():
|
226 |
-
record_audio = gr.Audio(label="record your voice", source="microphone")
|
227 |
-
upload_audio = gr.Audio(label="or upload audio here", source="upload")
|
228 |
-
source_speaker = gr.Dropdown(choices=speakers, value=speakers[0], label="source speaker")
|
229 |
-
target_speaker = gr.Dropdown(choices=speakers, value=speakers[0], label="target speaker")
|
230 |
-
with gr.Column():
|
231 |
-
message_box = gr.Textbox(label="Message")
|
232 |
-
converted_audio = gr.Audio(label='converted audio')
|
233 |
-
btn = gr.Button("Convert!")
|
234 |
-
btn.click(vc_fn, inputs=[source_speaker, target_speaker, record_audio, upload_audio],
|
235 |
-
outputs=[message_box, converted_audio])
|
236 |
|
237 |
app.queue(concurrency_count=3).launch(show_api=False, share=args.share)
|
238 |
|
|
|
117 |
parser = argparse.ArgumentParser()
|
118 |
parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
|
119 |
args = parser.parse_args()
|
120 |
+
categories = ["Princess Connect! Re:Dive"]
|
121 |
+
others = {
|
122 |
+
"Umamusume": "https://huggingface.co/spaces/FrankZxShen/vits-fast-finetuning-umamusume",
|
123 |
+
}
|
124 |
for info in models_info:
|
125 |
lang = info['languages']
|
126 |
examples = info['examples']
|
|
|
158 |
gr.Markdown("# TTS&Voice Conversion for Princess Connect! Re:Dive\n\n"
|
159 |
)
|
160 |
with gr.Tabs():
|
161 |
+
for category in categories:
|
162 |
+
with gr.TabItem(category):
|
163 |
+
with gr.Tab("TTS"):
|
164 |
+
for i, (description, speakers, lang, example, symbols, tts_fn, to_symbol_fn) in enumerate(
|
165 |
+
models_tts):
|
166 |
+
gr.Markdown(description)
|
167 |
+
with gr.Row():
|
168 |
+
with gr.Column():
|
169 |
+
textbox = gr.TextArea(label="Text",
|
170 |
+
placeholder="Type your sentence here ",
|
171 |
+
value="よーし、私もがんばらないと!", elem_id=f"tts-input")
|
172 |
+
with gr.Accordion(label="Phoneme Input", open=False):
|
173 |
+
temp_text_var = gr.Variable()
|
174 |
+
symbol_input = gr.Checkbox(
|
175 |
+
value=False, label="Symbol input")
|
176 |
+
symbol_list = gr.Dataset(label="Symbol list", components=[textbox],
|
177 |
+
samples=[[x]
|
178 |
+
for x in symbols],
|
179 |
+
elem_id=f"symbol-list")
|
180 |
+
symbol_list_json = gr.Json(
|
181 |
+
value=symbols, visible=False)
|
182 |
+
symbol_input.change(to_symbol_fn,
|
183 |
+
[symbol_input, textbox,
|
184 |
+
temp_text_var],
|
185 |
+
[textbox, temp_text_var])
|
186 |
+
symbol_list.click(None, [symbol_list, symbol_list_json], textbox,
|
187 |
+
_js=f"""
|
188 |
+
(i, symbols, text) => {{
|
189 |
+
let root = document.querySelector("body > gradio-app");
|
190 |
+
if (root.shadowRoot != null)
|
191 |
+
root = root.shadowRoot;
|
192 |
+
let text_input = root.querySelector("#tts-input").querySelector("textarea");
|
193 |
+
let startPos = text_input.selectionStart;
|
194 |
+
let endPos = text_input.selectionEnd;
|
195 |
+
let oldTxt = text_input.value;
|
196 |
+
let result = oldTxt.substring(0, startPos) + symbols[i] + oldTxt.substring(endPos);
|
197 |
+
text_input.value = result;
|
198 |
+
let x = window.scrollX, y = window.scrollY;
|
199 |
+
text_input.focus();
|
200 |
+
text_input.selectionStart = startPos + symbols[i].length;
|
201 |
+
text_input.selectionEnd = startPos + symbols[i].length;
|
202 |
+
text_input.blur();
|
203 |
+
window.scrollTo(x, y);
|
204 |
+
text = text_input.value;
|
205 |
+
return text;
|
206 |
+
}}""")
|
207 |
+
# select character
|
208 |
+
char_dropdown = gr.Dropdown(
|
209 |
+
choices=speakers, value=speakers[0], label='character')
|
210 |
+
language_dropdown = gr.Dropdown(
|
211 |
+
choices=lang, value=lang[0], label='language')
|
212 |
+
ns = gr.Slider(
|
213 |
+
label="noise_scale", minimum=0.1, maximum=1.0, step=0.1, value=0.6, interactive=True)
|
214 |
+
nsw = gr.Slider(label="noise_scale_w", minimum=0.1,
|
215 |
+
maximum=1.0, step=0.1, value=0.668, interactive=True)
|
216 |
+
duration_slider = gr.Slider(minimum=0.1, maximum=5, value=1, step=0.1,
|
217 |
+
label='速度 Speed')
|
218 |
+
with gr.Column():
|
219 |
+
text_output = gr.Textbox(label="Message")
|
220 |
+
audio_output = gr.Audio(
|
221 |
+
label="Output Audio", elem_id="tts-audio")
|
222 |
+
btn = gr.Button("Generate!")
|
223 |
+
btn.click(tts_fn,
|
224 |
+
inputs=[textbox, char_dropdown, language_dropdown, ns, nsw, duration_slider,
|
225 |
+
symbol_input],
|
226 |
+
outputs=[text_output, audio_output])
|
227 |
+
gr.Examples(
|
228 |
+
examples=example,
|
229 |
+
inputs=[textbox, char_dropdown, language_dropdown,
|
230 |
+
duration_slider, symbol_input],
|
231 |
+
outputs=[text_output, audio_output],
|
232 |
+
fn=tts_fn
|
233 |
+
)
|
234 |
+
with gr.Tab("Voice Conversion"):
|
235 |
+
for i, (description, speakers, vc_fn) in enumerate(
|
236 |
+
models_vc):
|
237 |
+
gr.Markdown("""
|
238 |
+
录制或上传声音,并选择要转换的音色。
|
239 |
+
""")
|
240 |
+
with gr.Column():
|
241 |
+
record_audio = gr.Audio(
|
242 |
+
label="record your voice", source="microphone")
|
243 |
+
upload_audio = gr.Audio(
|
244 |
+
label="or upload audio here", source="upload")
|
245 |
+
source_speaker = gr.Dropdown(
|
246 |
+
choices=speakers, value=speakers[0], label="source speaker")
|
247 |
+
target_speaker = gr.Dropdown(
|
248 |
+
choices=speakers, value=speakers[0], label="target speaker")
|
249 |
+
with gr.Column():
|
250 |
+
message_box = gr.Textbox(label="Message")
|
251 |
+
converted_audio = gr.Audio(
|
252 |
+
label='converted audio')
|
253 |
+
btn = gr.Button("Convert!")
|
254 |
+
btn.click(vc_fn, inputs=[source_speaker, target_speaker, record_audio, upload_audio],
|
255 |
+
outputs=[message_box, converted_audio])
|
256 |
+
for category, link in others.items():
|
257 |
+
with gr.TabItem(category):
|
258 |
+
gr.Markdown(
|
259 |
+
f'''
|
260 |
+
<center>
|
261 |
+
<h2>Click to Go</h2>
|
262 |
+
<a href="{link}">
|
263 |
+
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-xl-dark.svg"
|
264 |
+
</a>
|
265 |
+
</center>
|
266 |
+
'''
|
267 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
app.queue(concurrency_count=3).launch(show_api=False, share=args.share)
|
270 |
|