File size: 3,312 Bytes
06a0460
12bfd03
 
 
 
 
 
 
 
 
251bc7a
4905c07
251bc7a
12bfd03
 
 
 
06a0460
12bfd03
 
251bc7a
12bfd03
 
4905c07
 
12bfd03
 
 
 
 
 
 
 
 
 
4905c07
12bfd03
4905c07
12bfd03
4905c07
12bfd03
 
 
 
 
 
4905c07
12bfd03
4905c07
12bfd03
 
 
4905c07
12bfd03
 
4905c07
12bfd03
 
 
 
 
 
 
4905c07
12bfd03
 
4905c07
12bfd03
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import spaces
import random

import gradio as gr
from css.utils import *


# 高级语音生成
def advanced():

    sound_choices_values = ['中文女', '中文男', '英文女', '英文男', '日语男', '粤语女', '韩语女']
    sound_choices = ['Chinese Female', 'Chinese Male', 'Japanese Male', 'English Female', 'English Male', 'Cantonese Female', 'Korean Female']
    sound_choices_dict = {k:v for k, v in zip(sound_choices, sound_choices_values)}

    def random_seed():
        return random.randint(1, 100000000)

    @spaces.GPU
    def generate_audio(_sound_radio, _speech_status_textbox,
                       _synthetic_input_textbox, _seed):
        _sound_radio = sound_choices_dict[_sound_radio]
        print(_sound_radio, _speech_status_textbox, _synthetic_input_textbox, _seed)
        if _synthetic_input_textbox == '':
            #gr.Warning('合成文本为空,您是否忘记输入合成文本?')
            gr.Warning('The synthesis text is empty, did you forget to input the synthesis text?')
            return (target_sr, default_data)
        set_all_random_seed(_seed)
        model = cosyvoice_instruct
        output = model.inference_instruct(_synthetic_input_textbox, _sound_radio, _speech_status_textbox)
        audio_data = postprocess(output['tts_speech']).numpy().flatten()
        return (target_sr, audio_data)

    with gr.Column():
        sound_radio = gr.Radio(choices=sound_choices,
                               value=sound_choices[0],
                               label="Select Preset Voice")
    with gr.Column():
        speech_status_textbox = gr.Textbox(label="Describe Voice Status")
        gr.Examples(
            label="Example of control text",
            examples=[
                ["Selene 'Moonshade', is a mysterious, elegant dancer with a connection to the night. Her movements are both mesmerizing and deadly. "],
                ["A female speaker with normal pitch, slow speaking rate, and sad emotion."],
            ],
            inputs=[speech_status_textbox])
    with gr.Column():
        synthetic_input_textbox = gr.Textbox(label="Input Synthesis Text")
        gr.Examples(
            label="example",
            examples=example_tts_text,
            inputs=[synthetic_input_textbox])

    with gr.Accordion(label="Random Seed"):
        with gr.Row():
            with gr.Column(scale=1, min_width=180):
                seed_button = gr.Button(value="\U0001F3B2 Shuffle Randomly",
                                        elem_classes="full-height")
            with gr.Column(scale=10):
                seed = gr.Number(show_label=False,
                                 value=0,
                                 container=False,
                                 elem_classes="full-height")
    with gr.Column():
        generate_button = gr.Button("Generate Audio", variant="primary", size="lg")

    with gr.Column():
        output_audio = gr.Audio(label="Synthesize Audio")

    seed_button.click(fn=random_seed, outputs=[seed])
    generate_button.click(fn=generate_audio,
                          inputs=[
                              sound_radio, speech_status_textbox,
                              synthetic_input_textbox, seed
                          ],
                          outputs=[output_audio])