Omnibus commited on
Commit
dce80ca
1 Parent(s): 071b368

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoProcessor, BarkModel
5
  import scipy
6
  from pytube import YouTube
7
  from pydub import AudioSegment
8
-
9
  #import ffmpeg
10
 
11
 
@@ -39,6 +39,12 @@ def run_bark(text, n, lang):
39
  scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
40
  return ("bark_out.wav")
41
 
 
 
 
 
 
 
42
  def load_video_yt(vid):
43
  yt = YouTube(vid)
44
  vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename="tmp.mp4")
@@ -54,10 +60,10 @@ def trim_clip(clip, start_t, end_t):
54
  format="mp4")
55
 
56
  # start and end time
57
- start_min = 0
58
- start_sec = 10
59
- end_min = 0
60
- end_sec = 55
61
  start_min = int(start_t.split(":",1)[0])
62
  start_sec = int(start_t.split(":",1)[1])
63
  end_min = int(end_t.split(":",1)[0])
@@ -100,9 +106,8 @@ with gr.Blocks() as app:
100
 
101
  trim_clip_btn = gr.Button("Trim Clip")
102
  trim_aud = gr.Audio(source='upload', interactive = False)
103
- yt_vid = gr.Video(type = 'filepath')
104
- trim_vid=gr.Video()
105
  alt_go_btn = gr.Button()
 
106
  #speaker_num = gr.Number(value=0)
107
 
108
  with gr.Column():
@@ -111,6 +116,6 @@ with gr.Blocks() as app:
111
  go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
112
  load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
113
  trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
114
- #alt_go_btn.click()
115
 
116
  app.launch()
 
5
  import scipy
6
  from pytube import YouTube
7
  from pydub import AudioSegment
8
+ from TTS.api import TTS
9
  #import ffmpeg
10
 
11
 
 
39
  scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
40
  return ("bark_out.wav")
41
 
42
+ def custom_bark(inp):
43
+ speaker_wav=Path("Mid.mp3")
44
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
45
+ tts.tts_to_file("This is voice cloning.", speaker_wav=speaker_wav, language="en", file_path="output.wav")
46
+ return ("output.wav")
47
+
48
  def load_video_yt(vid):
49
  yt = YouTube(vid)
50
  vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename="tmp.mp4")
 
60
  format="mp4")
61
 
62
  # start and end time
63
+ #start_min = 0
64
+ #start_sec = 10
65
+ #end_min = 0
66
+ #end_sec = 55
67
  start_min = int(start_t.split(":",1)[0])
68
  start_sec = int(start_t.split(":",1)[1])
69
  end_min = int(end_t.split(":",1)[0])
 
106
 
107
  trim_clip_btn = gr.Button("Trim Clip")
108
  trim_aud = gr.Audio(source='upload', interactive = False)
 
 
109
  alt_go_btn = gr.Button()
110
+ yt_vid = gr.Video(type = 'filepath')
111
  #speaker_num = gr.Number(value=0)
112
 
113
  with gr.Column():
 
116
  go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
117
  load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
118
  trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
119
+ alt_go_btn.click(custom_bark, trim_aud, out_audio)
120
 
121
  app.launch()