innnky commited on
Commit
68e3b88
1 Parent(s): ee37b01
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -55,7 +55,7 @@ hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")
55
 
56
  _ = utils.load_checkpoint("nyarumodel.pth", net_g_ms, None)
57
 
58
- def vc_fn(input_audio,vc_transform):
59
  if input_audio is None:
60
  return "You need to upload an audio", None
61
  sampling_rate, audio = input_audio
@@ -78,7 +78,7 @@ def vc_fn(input_audio,vc_transform):
78
  sf.write("temp.wav", audio22050, 22050)
79
  pitch = transcribe("temp.wav", soft.shape[0], vc_transform)
80
  pitch = torch.LongTensor(pitch).unsqueeze(0)
81
- sid = torch.LongTensor([0])
82
  stn_tst = torch.FloatTensor(soft)
83
  with torch.no_grad():
84
  x_tst = stn_tst.unsqueeze(0)
@@ -89,16 +89,20 @@ def vc_fn(input_audio,vc_transform):
89
  return "Success", (hps_ms.data.sampling_rate, audio)
90
 
91
 
92
-
93
  app = gr.Blocks()
94
  with app:
95
  with gr.Tabs():
96
  with gr.TabItem("Basic"):
97
- vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
98
- vc_transform = gr.Number(label="变调数量",value=1)
99
- vc_submit = gr.Button("Convert", variant="primary")
 
 
 
 
 
100
  vc_output1 = gr.Textbox(label="Output Message")
101
  vc_output2 = gr.Audio(label="Output Audio")
102
- vc_submit.click(vc_fn, [ vc_input3,vc_transform], [vc_output1, vc_output2])
103
 
104
  app.launch()
 
55
 
56
  _ = utils.load_checkpoint("nyarumodel.pth", net_g_ms, None)
57
 
58
+ def vc_fn(sid, input_audio,vc_transform):
59
  if input_audio is None:
60
  return "You need to upload an audio", None
61
  sampling_rate, audio = input_audio
 
78
  sf.write("temp.wav", audio22050, 22050)
79
  pitch = transcribe("temp.wav", soft.shape[0], vc_transform)
80
  pitch = torch.LongTensor(pitch).unsqueeze(0)
81
+ sid = torch.LongTensor([0]) if sid == "猫雷" else torch.LongTensor([1])
82
  stn_tst = torch.FloatTensor(soft)
83
  with torch.no_grad():
84
  x_tst = stn_tst.unsqueeze(0)
 
89
  return "Success", (hps_ms.data.sampling_rate, audio)
90
 
91
 
 
92
  app = gr.Blocks()
93
  with app:
94
  with gr.Tabs():
95
  with gr.TabItem("Basic"):
96
+ gr.Markdown(value="""本模型相比与前一个模型,音质和音准方面有一定的提升,但是低音音域目前存在较大问题。
97
+ 目前能够唱的最低音为#G3(207hz) 低于该音会当场爆炸(之前的模型只是会跑调),
98
+ 因此请不要让这个模型唱男声的音高,请使用变调功能将音域移动至207hz以上。
99
+ """)
100
+ sid = gr.Dropdown(label="音色",choices=['猫雷',"???"], value="猫雷")
101
+ vc_input3 = gr.Audio(label="上传音频(长度小于45秒)")
102
+ vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12)",value=0)
103
+ vc_submit = gr.Button("转换", variant="primary")
104
  vc_output1 = gr.Textbox(label="Output Message")
105
  vc_output2 = gr.Audio(label="Output Audio")
106
+ vc_submit.click(vc_fn, [sid, vc_input3, vc_transform], [vc_output1, vc_output2])
107
 
108
  app.launch()