vsrinivas commited on
Commit
c3a36b3
1 Parent(s): 2a6fdc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -12,7 +12,7 @@ from IPython.display import Audio, display
12
 
13
  model = whisper.load_model("base")
14
 
15
- def extract_yt_audio(video_url):
16
 
17
  """
18
  Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
@@ -20,7 +20,7 @@ def extract_yt_audio(video_url):
20
  Returns the extracted video clip (video) and the path to audio clip (audio_path).
21
  """
22
 
23
- if "youtube.com" in video_url or "youtu.be" in video_url:
24
  yt = YouTube(video_url, use_oauth=True)
25
  a = yt.streams.filter(only_audio=True).first()
26
  audio_file = a.download()
@@ -30,20 +30,18 @@ def extract_yt_audio(video_url):
30
  audio_path = 'audio.wav'
31
  # display(Audio(audio_path))
32
  sample.export(audio_path, format="wav")
33
- result = model.transcribe(audio_path)
34
  print("Transcription started \nTranscript:\n")
 
35
  print(result['text'], '\n')
36
  return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
37
 
38
 
39
  def semantic_chunks(segs, max_chunk_length=15.0):
40
- print(type(segs))
41
- print(segs)
42
  """
43
  Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
44
  """
45
  segs = ast.literal_eval(segs)
46
- print(type(segs))
47
 
48
  chunks = []
49
  current_chunk = []
@@ -95,12 +93,18 @@ def semantic_chunks(segs, max_chunk_length=15.0):
95
  'start_time (secs)': chunk_start_time,
96
  'end_time (secs)': chunk_end_time
97
  })
98
-
99
  return gr.update(visible=True, value=pd.DataFrame(chunks))
100
 
101
 
 
 
 
 
 
 
102
  def clear_all():
103
- return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
104
 
105
 
106
  with gr.Blocks() as demo:
@@ -108,8 +112,15 @@ with gr.Blocks() as demo:
108
  """
109
  # Extract audio from video, get the transcript and then get the semantic chunk information.
110
  """)
 
 
 
111
  # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
112
- input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='sample.wav')
 
 
 
 
113
  segments = gr.Textbox(visible=False)
114
  submit_btn_1 = gr.Button("Get the Transcript", visible=True)
115
  audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
@@ -118,7 +129,9 @@ with gr.Blocks() as demo:
118
  chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
119
  clear_btn = gr.Button("Clear")
120
 
121
- submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
 
 
122
  submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
123
- clear_btn.click(fn=clear_all, outputs=[input_url, transcript, submit_btn_2, chunks, audio])
124
  demo.launch(debug=True)
 
12
 
13
  model = whisper.load_model("base")
14
 
15
+ def extract_yt_audio(it, video_url, video_file):
16
 
17
  """
18
  Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
 
20
  Returns the extracted video clip (video) and the path to audio clip (audio_path).
21
  """
22
 
23
+ if it == 'URL' and ("youtube.com" in video_url or "youtu.be" in video_url):
24
  yt = YouTube(video_url, use_oauth=True)
25
  a = yt.streams.filter(only_audio=True).first()
26
  audio_file = a.download()
 
30
  audio_path = 'audio.wav'
31
  # display(Audio(audio_path))
32
  sample.export(audio_path, format="wav")
 
33
  print("Transcription started \nTranscript:\n")
34
+ result = model.transcribe(audio_path)
35
  print(result['text'], '\n')
36
  return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
37
 
38
 
39
  def semantic_chunks(segs, max_chunk_length=15.0):
40
+ print("Trying to get symantically chunked segments:")
 
41
  """
42
  Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
43
  """
44
  segs = ast.literal_eval(segs)
 
45
 
46
  chunks = []
47
  current_chunk = []
 
93
  'start_time (secs)': chunk_start_time,
94
  'end_time (secs)': chunk_end_time
95
  })
96
+ print(pd.DataFrame(chunks))
97
  return gr.update(visible=True, value=pd.DataFrame(chunks))
98
 
99
 
100
+ def toggle_input_fields(input_type):
101
+ if input_type == "URL":
102
+ return gr.update(visible=True), gr.update(visible=False)
103
+ else:
104
+ return gr.update(visible=False), gr.update(visible=True)
105
+
106
  def clear_all():
107
+ return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
108
 
109
 
110
  with gr.Blocks() as demo:
 
112
  """
113
  # Extract audio from video, get the transcript and then get the semantic chunk information.
114
  """)
115
+ # Radio button to choose between URL or upload
116
+ input_type = gr.Radio(choices=["URL", "Upload"], label="Select Video Input Type", value="URL")
117
+
118
  # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
119
+
120
+ input_url = gr.Textbox(label="Enter Video URL", visible=False)
121
+ video_file = gr.File(label="Upload Video", visible=False)
122
+
123
+ # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='sample.mp4')
124
  segments = gr.Textbox(visible=False)
125
  submit_btn_1 = gr.Button("Get the Transcript", visible=True)
126
  audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
 
129
  chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
130
  clear_btn = gr.Button("Clear")
131
 
132
+ input_type.change(fn=toggle_input_fields, inputs=input_type, outputs=[input_url, video_file])
133
+ submit_btn_1.click(fn=extract_yt_audio, inputs=[input_type, input_url, video_file], outputs=[transcript, submit_btn_2, segments, audio])
134
+ # submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
135
  submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
136
+ clear_btn.click(fn=clear_all, outputs=[input_url, video_file, transcript, submit_btn_2, chunks, audio])
137
  demo.launch(debug=True)