Spaces:

vsrinivas
/

Transcribe_the_audio_and_get_semantic_chunks

Running

App Files Files Community

vsrinivas commited on 1 day ago

Commit

c3a36b3

•

1 Parent(s): 2a6fdc6

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from IPython.display import Audio, display
 model = whisper.load_model("base")
-def extract_yt_audio(video_url):
     """
     Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
@@ -20,7 +20,7 @@ def extract_yt_audio(video_url):
     Returns the extracted video clip (video) and the path to audio clip (audio_path).
     """
-    if "youtube.com" in video_url or "youtu.be" in video_url:
       yt = YouTube(video_url, use_oauth=True)
       a = yt.streams.filter(only_audio=True).first()
       audio_file = a.download()
@@ -30,20 +30,18 @@ def extract_yt_audio(video_url):
     audio_path = 'audio.wav'
     # display(Audio(audio_path))
     sample.export(audio_path, format="wav")
-    result = model.transcribe(audio_path)
     print("Transcription started \nTranscript:\n")
     print(result['text'], '\n')
     return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
 def semantic_chunks(segs, max_chunk_length=15.0):
-    print(type(segs))
-    print(segs)
     """
     Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
     """
     segs = ast.literal_eval(segs)
-    print(type(segs))
     chunks = []
     current_chunk = []
@@ -95,12 +93,18 @@ def semantic_chunks(segs, max_chunk_length=15.0):
             'start_time (secs)': chunk_start_time,
             'end_time (secs)': chunk_end_time
         })
     return gr.update(visible=True, value=pd.DataFrame(chunks))
 def clear_all():
-    return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
 with gr.Blocks() as demo:
@@ -108,8 +112,15 @@ with gr.Blocks() as demo:
     """
     # Extract audio from video, get the transcript and then get the semantic chunk information.
     """)
     # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
-    input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='sample.wav')
     segments = gr.Textbox(visible=False)
     submit_btn_1 = gr.Button("Get the Transcript", visible=True)
     audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
@@ -118,7 +129,9 @@ with gr.Blocks() as demo:
     chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
     clear_btn = gr.Button("Clear")
-    submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
     submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
-    clear_btn.click(fn=clear_all, outputs=[input_url, transcript, submit_btn_2, chunks, audio])
 demo.launch(debug=True)

 model = whisper.load_model("base")
+def extract_yt_audio(it, video_url, video_file):
     """
     Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
     Returns the extracted video clip (video) and the path to audio clip (audio_path).
     """
+    if it == 'URL' and ("youtube.com" in video_url or "youtu.be" in video_url):
       yt = YouTube(video_url, use_oauth=True)
       a = yt.streams.filter(only_audio=True).first()
       audio_file = a.download()
     audio_path = 'audio.wav'
     # display(Audio(audio_path))
     sample.export(audio_path, format="wav")
     print("Transcription started \nTranscript:\n")
+    result = model.transcribe(audio_path)
     print(result['text'], '\n')
     return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
 def semantic_chunks(segs, max_chunk_length=15.0):
+    print("Trying to get symantically chunked segments:")
     """
     Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
     """
     segs = ast.literal_eval(segs)
     chunks = []
     current_chunk = []
             'start_time (secs)': chunk_start_time,
             'end_time (secs)': chunk_end_time
         })
+    print(pd.DataFrame(chunks))
     return gr.update(visible=True, value=pd.DataFrame(chunks))
+def toggle_input_fields(input_type):
+    if input_type == "URL":
+        return gr.update(visible=True), gr.update(visible=False)
+    else:
+        return gr.update(visible=False), gr.update(visible=True)
 def clear_all():
+    return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
 with gr.Blocks() as demo:
     """
     # Extract audio from video, get the transcript and then get the semantic chunk information.
     """)
+    # Radio button to choose between URL or upload
+    input_type = gr.Radio(choices=["URL", "Upload"], label="Select Video Input Type", value="URL")
     # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
+    input_url = gr.Textbox(label="Enter Video URL", visible=False)
+    video_file = gr.File(label="Upload Video", visible=False)
+    # input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='sample.mp4')
     segments = gr.Textbox(visible=False)
     submit_btn_1 = gr.Button("Get the Transcript", visible=True)
     audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
     chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
     clear_btn = gr.Button("Clear")
+    input_type.change(fn=toggle_input_fields, inputs=input_type, outputs=[input_url, video_file])
+    submit_btn_1.click(fn=extract_yt_audio, inputs=[input_type, input_url, video_file], outputs=[transcript, submit_btn_2, segments, audio])
+    # submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
     submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
+    clear_btn.click(fn=clear_all, outputs=[input_url, video_file, transcript, submit_btn_2, chunks, audio])
 demo.launch(debug=True)