voice_clone_v2

Paused

App Files Files Community

ahassoun commited on Sep 18, 2023

Commit

bb9d46e

•

1 Parent(s): 58ffcfd

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -60

app.py CHANGED Viewed

@@ -1,32 +1,32 @@
-from TTS.api import TTS
 import json
 import gradio as gr
 from gradio import Dropdown
-from share_btn import community_icon_html, loading_icon_html, share_js
 import os
 import shutil
 import re
-with open("characters.json", "r") as file:
-    data = json.load(file)
-    characters = [
-        {
-            "image": item["image"],
-            "title": item["title"],
-            "speaker": item["speaker"]
-        }
-        for item in data
-    ]
-tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
-def update_selection(selected_state: gr.SelectData):
-    c_image = characters[selected_state.index]["image"]
-    c_title = characters[selected_state.index]["title"]
-    c_speaker = characters[selected_state.index]["speaker"]
-    return c_title, selected_state
 def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
@@ -116,12 +116,6 @@ prompt = Dropdown(
 )
-def update_helper_text(prompt_choice):
-    return positive_prompts.get(prompt_choice, '')
-prompt.change(update_helper_text, outputs=["texts_samples"], queue=False)
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 a {text-decoration-line: underline; font-weight: 600;}
@@ -164,61 +158,81 @@ span.record-icon > span.dot.svelte-1thnwz {
   height: 36px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
-                prompt = Dropdown(
-                    label="Text to speech prompt",
                     choices=prompt_choices,
                     elem_id="tts-prompt"
                 )
-                audio_in = gr.Audio(
-                    label="WAV voice to clone",
                     type="filepath",
-                    source="upload"
                 )
-                clean_sample = gr.Checkbox(
-                    label="Clean sample ?", value=False)
-                hidden_audio_numpy = gr.Audio(
-                    type="numpy", visible=False)
                 submit_btn = gr.Button("Submit")
             with gr.Column():
                 cloned_out = gr.Audio(
-                    label="Text to speech output",
-                    visible=False
-                )
-                video_out = gr.Video(
-                    label="Waveform video",
-                    elem_id="voice-video-out"
-                )
-                npz_file = gr.File(
-                    label=".npz file",
-                    visible=False
-                )
                 folder_path = gr.Textbox(visible=False)
-audio_in.change(fn=wipe_npz_file, inputs=[folder_path], queue=False)
-submit_btn.click(
-    fn=infer,
-    inputs=[
-        prompt,
-        audio_in,
-        clean_sample,
-        hidden_audio_numpy
-    ],
-    outputs=[
-        cloned_out,
-        video_out,
-        npz_file,
-        folder_path
-    ]
-)
 demo.queue(api_open=False, max_size=10).launch()

 import json
 import gradio as gr
 from gradio import Dropdown
+# from share_btn import community_icon_html, loading_icon_html, share_js
 import os
 import shutil
 import re
+user_choice = ""
+# with open("characters.json", "r") as file:
+#     data = json.load(file)
+#     characters = [
+#         {
+#             "image": item["image"],
+#             "title": item["title"],
+#             "speaker": item["speaker"]
+#         }
+#         for item in data
+#     ]
+# tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
+# def update_selection(selected_state: gr.SelectData):
+#     c_image = characters[selected_state.index]["image"]
+#     c_title = characters[selected_state.index]["title"]
+#     c_speaker = characters[selected_state.index]["speaker"]
+#     return c_title, selected_state
 def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
 )
 css = """
 #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 a {text-decoration-line: underline; font-weight: 600;}
   height: 36px;
 }
 """
+def load_hidden_mic(audio_in):
+    print("USER RECORDED A NEW SAMPLE")
+    return audio_in
+def update_positive_prompt(prompt_value):
+    global user_choice
+    user_choice = prompt_value
+    if prompt_value in positive_prompts:
+        return positive_prompts[prompt_value]
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
+                prompt = gr.Dropdown(
+                    label="Negative Speech Prompt",
                     choices=prompt_choices,
                     elem_id="tts-prompt"
                 )
+                texts_samples = gr.Textbox(
+                    label="Positive prompts",
+                    info="Please read out this prompt 5 times to generate a good sample",
+                    value="",
+                    lines=5,
+                    elem_id="texts_samples"
+                )
+                # Connect the prompt change to the update_positive_prompt function
+                prompt.change(fn=update_positive_prompt,
+                              inputs=prompt, outputs=texts_samples)
+                # Replace file input with microphone input
+                micro_in = gr.Audio(
+                    label="Record voice to clone",
                     type="filepath",
+                    source="microphone",
+                    interactive=True
                 )
+                hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
                 submit_btn = gr.Button("Submit")
             with gr.Column():
                 cloned_out = gr.Audio(
+                    label="Text to speech output", visible=False)
+                video_out = gr.Video(label="Waveform video",
+                                     elem_id="voice-video-out")
+                npz_file = gr.File(label=".npz file", visible=False)
                 folder_path = gr.Textbox(visible=False)
+        micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[
+                                hidden_audio_numpy], queue=False)
+        submit_btn.click(
+            fn=infer,
+            inputs=[
+                prompt,
+                micro_in,
+                hidden_audio_numpy
+            ],
+            outputs=[
+                cloned_out,
+                video_out,
+                npz_file,
+                folder_path
+            ]
+        )
 demo.queue(api_open=False, max_size=10).launch()