Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from IPython.display import Audio, display
|
|
12 |
|
13 |
model = whisper.load_model("base")
|
14 |
|
15 |
-
def extract_yt_audio(video_url):
|
16 |
|
17 |
"""
|
18 |
Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
|
@@ -20,7 +20,7 @@ def extract_yt_audio(video_url):
|
|
20 |
Returns the extracted video clip (video) and the path to audio clip (audio_path).
|
21 |
"""
|
22 |
|
23 |
-
if "youtube.com" in video_url or "youtu.be" in video_url:
|
24 |
yt = YouTube(video_url, use_oauth=True)
|
25 |
a = yt.streams.filter(only_audio=True).first()
|
26 |
audio_file = a.download()
|
@@ -30,20 +30,18 @@ def extract_yt_audio(video_url):
|
|
30 |
audio_path = 'audio.wav'
|
31 |
# display(Audio(audio_path))
|
32 |
sample.export(audio_path, format="wav")
|
33 |
-
result = model.transcribe(audio_path)
|
34 |
print("Transcription started \nTranscript:\n")
|
|
|
35 |
print(result['text'], '\n')
|
36 |
return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
|
37 |
|
38 |
|
39 |
def semantic_chunks(segs, max_chunk_length=15.0):
|
40 |
-
print(
|
41 |
-
print(segs)
|
42 |
"""
|
43 |
Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
|
44 |
"""
|
45 |
segs = ast.literal_eval(segs)
|
46 |
-
print(type(segs))
|
47 |
|
48 |
chunks = []
|
49 |
current_chunk = []
|
@@ -95,12 +93,18 @@ def semantic_chunks(segs, max_chunk_length=15.0):
|
|
95 |
'start_time (secs)': chunk_start_time,
|
96 |
'end_time (secs)': chunk_end_time
|
97 |
})
|
98 |
-
|
99 |
return gr.update(visible=True, value=pd.DataFrame(chunks))
|
100 |
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def clear_all():
|
103 |
-
return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
104 |
|
105 |
|
106 |
with gr.Blocks() as demo:
|
@@ -108,8 +112,15 @@ with gr.Blocks() as demo:
|
|
108 |
"""
|
109 |
# Extract audio from video, get the transcript and then get the semantic chunk information.
|
110 |
""")
|
|
|
|
|
|
|
111 |
# input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
|
112 |
-
|
|
|
|
|
|
|
|
|
113 |
segments = gr.Textbox(visible=False)
|
114 |
submit_btn_1 = gr.Button("Get the Transcript", visible=True)
|
115 |
audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
|
@@ -118,7 +129,9 @@ with gr.Blocks() as demo:
|
|
118 |
chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
|
119 |
clear_btn = gr.Button("Clear")
|
120 |
|
121 |
-
|
|
|
|
|
122 |
submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
|
123 |
-
clear_btn.click(fn=clear_all, outputs=[input_url, transcript, submit_btn_2, chunks, audio])
|
124 |
demo.launch(debug=True)
|
|
|
12 |
|
13 |
model = whisper.load_model("base")
|
14 |
|
15 |
+
def extract_yt_audio(it, video_url, video_file):
|
16 |
|
17 |
"""
|
18 |
Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
|
|
|
20 |
Returns the extracted video clip (video) and the path to audio clip (audio_path).
|
21 |
"""
|
22 |
|
23 |
+
if it == 'URL' and ("youtube.com" in video_url or "youtu.be" in video_url):
|
24 |
yt = YouTube(video_url, use_oauth=True)
|
25 |
a = yt.streams.filter(only_audio=True).first()
|
26 |
audio_file = a.download()
|
|
|
30 |
audio_path = 'audio.wav'
|
31 |
# display(Audio(audio_path))
|
32 |
sample.export(audio_path, format="wav")
|
|
|
33 |
print("Transcription started \nTranscript:\n")
|
34 |
+
result = model.transcribe(audio_path)
|
35 |
print(result['text'], '\n')
|
36 |
return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
|
37 |
|
38 |
|
39 |
def semantic_chunks(segs, max_chunk_length=15.0):
|
40 |
+
print("Trying to get symantically chunked segments:")
|
|
|
41 |
"""
|
42 |
Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
|
43 |
"""
|
44 |
segs = ast.literal_eval(segs)
|
|
|
45 |
|
46 |
chunks = []
|
47 |
current_chunk = []
|
|
|
93 |
'start_time (secs)': chunk_start_time,
|
94 |
'end_time (secs)': chunk_end_time
|
95 |
})
|
96 |
+
print(pd.DataFrame(chunks))
|
97 |
return gr.update(visible=True, value=pd.DataFrame(chunks))
|
98 |
|
99 |
|
100 |
+
def toggle_input_fields(input_type):
|
101 |
+
if input_type == "URL":
|
102 |
+
return gr.update(visible=True), gr.update(visible=False)
|
103 |
+
else:
|
104 |
+
return gr.update(visible=False), gr.update(visible=True)
|
105 |
+
|
106 |
def clear_all():
|
107 |
+
return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
|
108 |
|
109 |
|
110 |
with gr.Blocks() as demo:
|
|
|
112 |
"""
|
113 |
# Extract audio from video, get the transcript and then get the semantic chunk information.
|
114 |
""")
|
115 |
+
# Radio button to choose between URL or upload
|
116 |
+
input_type = gr.Radio(choices=["URL", "Upload"], label="Select Video Input Type", value="URL")
|
117 |
+
|
118 |
# input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
|
119 |
+
|
120 |
+
input_url = gr.Textbox(label="Enter Video URL", visible=False)
|
121 |
+
video_file = gr.File(label="Upload Video", visible=False)
|
122 |
+
|
123 |
+
# input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='sample.mp4')
|
124 |
segments = gr.Textbox(visible=False)
|
125 |
submit_btn_1 = gr.Button("Get the Transcript", visible=True)
|
126 |
audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
|
|
|
129 |
chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
|
130 |
clear_btn = gr.Button("Clear")
|
131 |
|
132 |
+
input_type.change(fn=toggle_input_fields, inputs=input_type, outputs=[input_url, video_file])
|
133 |
+
submit_btn_1.click(fn=extract_yt_audio, inputs=[input_type, input_url, video_file], outputs=[transcript, submit_btn_2, segments, audio])
|
134 |
+
# submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
|
135 |
submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
|
136 |
+
clear_btn.click(fn=clear_all, outputs=[input_url, video_file, transcript, submit_btn_2, chunks, audio])
|
137 |
demo.launch(debug=True)
|