vsrinivas commited on
Commit
ee54bd3
1 Parent(s): 4a94923

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -0
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytubefix import YouTube
2
+ from moviepy.editor import VideoFileClip, AudioFileClip
3
+ from pydub import AudioSegment
4
+ import whisper
5
+ import pandas as pd
6
+ import nltk
7
+ from nltk.tokenize import sent_tokenize
8
+ nltk.download('punkt')
9
+ import gradio as gr
10
+ import ast
11
+ from IPython.display import Audio, display
12
+
13
+ model = whisper.load_model("base")
14
+
15
+ def extract_yt_audio(video_url):
16
+
17
+ """
18
+ Takes youtube url (youtobe_url) and path where audio clip will be stored (audio_path)
19
+ in string format as input arguments.
20
+ Returns the extracted video clip (video) and the path to audio clip (audio_path).
21
+ """
22
+
23
+ if "youtube.com" in video_url or "youtu.be" in video_url:
24
+ yt = YouTube(video_url)
25
+ a = yt.streams.filter(only_audio=True).first()
26
+ audio_file = a.download()
27
+ sample = AudioSegment.from_file(audio_file)
28
+ else:
29
+ sample = AudioSegment.from_file(video_url)
30
+ audio_path = 'audio.wav'
31
+ display(Audio(audio_path))
32
+ sample.export(audio_path, format="wav")
33
+ result = model.transcribe(audio_path)
34
+ print("Transcription started \nTranscript:\n")
35
+ print(result['text'], '\n')
36
+ return gr.update(visible=True, value=result['text']), gr.update(visible=True), result['segments'], gr.update(visible=True, value=audio_path)
37
+
38
+
39
+ def semantic_chunks(segs, max_chunk_length=15.0):
40
+ print(type(segs))
41
+ print(segs)
42
+ """
43
+ Takes segments of transcribed audio and 15secs as maximum check duration and returns chunks of the audio as a list.
44
+ """
45
+ segs = ast.literal_eval(segs)
46
+ print(type(segs))
47
+
48
+ chunks = []
49
+ current_chunk = []
50
+ chunk_start_time = None
51
+ chunk_end_time = None
52
+ chunk_duration = 0
53
+
54
+ # iterate over segments and create chunks out of each segment
55
+ for segment in segs:
56
+ start = segment['start']
57
+ end = segment['end']
58
+ text = segment['text']
59
+
60
+ # sentence tokenize each segment to capture more semantic context
61
+ sentences = sent_tokenize(text)
62
+
63
+ # iterate over the sentences and group them into chunks subject to the max_chunk_length is 15 secs
64
+ for sentence in sentences:
65
+ sentence_duration = (end - start) / len(sentences)
66
+
67
+ # Check if adding the sentence exceeds the max_chunk_length of 15 secs
68
+ if chunk_duration + sentence_duration <= max_chunk_length:
69
+ if not current_chunk:
70
+ chunk_start_time = start
71
+ current_chunk.append(sentence)
72
+ chunk_duration += sentence_duration
73
+ chunk_end_time = end
74
+ else:
75
+ # If the chunk would be too long, finalize the current chunk with required parameters
76
+ chunks.append({
77
+ 'chunk_id': len(chunks) + 1,
78
+ 'chunk_length (secs)': chunk_duration,
79
+ 'semantic_chunk': ' '.join(current_chunk),
80
+ 'start_time (secs)': chunk_start_time,
81
+ 'end_time (secs)': chunk_end_time
82
+ })
83
+ # Start a new chunk with the current sentence
84
+ current_chunk = [sentence]
85
+ chunk_start_time = start
86
+ chunk_end_time = end
87
+ chunk_duration = sentence_duration
88
+
89
+ # Finalize the last chunk if it exists
90
+ if current_chunk:
91
+ chunks.append({
92
+ 'chunk_id': len(chunks) + 1,
93
+ 'chunk_length (secs)': chunk_duration,
94
+ 'semantic_chunk': ' '.join(current_chunk),
95
+ 'start_time (secs)': chunk_start_time,
96
+ 'end_time (secs)': chunk_end_time
97
+ })
98
+
99
+ return gr.update(visible=True, value=pd.DataFrame(chunks))
100
+
101
+
102
+ def clear_all():
103
+ return (gr.update(visible=True, value=""), gr.update(visible=True, value=""), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False))
104
+
105
+
106
+ with gr.Blocks() as demo:
107
+ gr.Markdown(
108
+ """
109
+ # Extract audio from video, get the transcript and then get the semantic chunk information.
110
+ """)
111
+ input_url = gr.Textbox(label="Type-in the URL or File Location of the Video", value='https://www.youtube.com/watch?v=ug5e4JfC3oo')
112
+ segments = gr.Textbox(visible=False)
113
+ submit_btn_1 = gr.Button("Get the Transcript", visible=True)
114
+ audio = gr.Audio(visible=True, type="filepath", label='Play Audio')
115
+ transcript = gr.Textbox(visible=True, label='Transcript')
116
+ submit_btn_2 = gr.Button("Get the semantically Chuncked Segments", visible=False)
117
+ chunks = gr.Dataframe(visible=False, label = 'semantic Chunks')
118
+ clear_btn = gr.Button("Clear")
119
+
120
+ submit_btn_1.click(fn=extract_yt_audio, inputs=[input_url], outputs=[transcript, submit_btn_2, segments, audio])
121
+ submit_btn_2.click(fn=semantic_chunks, inputs=[segments], outputs=[chunks])
122
+ clear_btn.click(fn=clear_all, outputs=[input_url, transcript, submit_btn_2, chunks, audio])
123
+ demo.launch(debug=True)