ashhadahsan commited on
Commit
452467a
1 Parent(s): 3159ccd
Files changed (8) hide show
  1. app.py +250 -0
  2. audio.txt +9 -0
  3. constants.py +183 -0
  4. requirements.txt +12 -0
  5. segments.json +1602 -0
  6. setup.py +28 -0
  7. transcription.json +922 -0
  8. utils.py +83 -0
app.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import streamlit as st
3
+ from constants import WHISPER_MODELS, language_dict
4
+ import streamlit as st
5
+ from utils import translate_to_english, detect_language, write, read, get_key
6
+ import whisperx as whisper
7
+ import json
8
+ import pandas as pd
9
+
10
+ if "btn1" not in st.session_state:
11
+ st.session_state["btn1"] = False
12
+ if "btn2" not in st.session_state:
13
+ st.session_state["btn2"] = False
14
+
15
+
16
+ class ByteEncoder(json.JSONEncoder):
17
+ def default(self, obj):
18
+ if isinstance(obj, bytes):
19
+ return obj.hex()
20
+ return json.JSONEncoder.default(self, obj)
21
+
22
+
23
+ def disable_btn2():
24
+ st.session_state["btn2"] = True
25
+
26
+
27
+ def disable_btn1():
28
+ st.session_state["btn1"] = True
29
+
30
+
31
+ st.set_page_config(page_title="Whisper-X", layout="wide")
32
+ import torch
33
+
34
+ if torch.cuda.is_available():
35
+ device = "gpu"
36
+ else:
37
+ device = "cpu"
38
+ input, output = st.columns(2, gap="medium")
39
+ with input:
40
+ st.header("Input")
41
+ audio_file = open("audio.wav", "rb")
42
+ audio_bytes = audio_file.read()
43
+ # st.markdown("""**sample audio**""", unsafe_allow_html=True)
44
+ st.audio(audio_bytes, format="audio/wav")
45
+ # st.markdown("""**your audio file**""", unsafe_allow_html=True)
46
+ audio_uploaded = st.file_uploader(
47
+ label="Upload your file",
48
+ type=["mp3", "wav"],
49
+ help="Your input file",
50
+ # on_change=disable_btn2,
51
+ # disabled=st.session_state["btn1"],
52
+ )
53
+ # text_json = st.file_uploader(
54
+ # label="Aligned JSON",
55
+ # type=["json"],
56
+ # help="Your aligned json file",
57
+ # # disabled=st.session_state["btn2"],
58
+ # # on_change=disable_btn1,
59
+ # )
60
+ text_json = None
61
+
62
+ # st.markdown("""**model**""", unsafe_allow_html=True)
63
+ model_name = st.selectbox(
64
+ label="Choose your model",
65
+ options=WHISPER_MODELS,
66
+ help="Choose a Whisper model.",
67
+ )
68
+ model_name = "base" if model_name == "" else model_name
69
+ # st.markdown("**transcription**", unsafe_allow_html=True)
70
+ transcription = st.selectbox(
71
+ "transcription",
72
+ options=["plain text", "srt", "vtt", "ass", "tsv"],
73
+ help="Choose the format for the transcription",
74
+ )
75
+ translate = st.checkbox(
76
+ "translate", help="Translate the text to English when set to True"
77
+ )
78
+ language = st.selectbox(
79
+ label="language",
80
+ options=list(language_dict.keys()) + list(language_dict.values()),
81
+ help="Translate the text to English when set to True",
82
+ )
83
+ patience = st.number_input(
84
+ label="patience",
85
+ step=0.01,
86
+ value=1.0,
87
+ help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search",
88
+ )
89
+ temperature = st.number_input(
90
+ label="temperature",
91
+ step=0.01,
92
+ value=1.0,
93
+ help="temperature to use for sampling",
94
+ )
95
+ suppress_tokens = st.text_input(
96
+ "suppress_tokens",
97
+ value="-1",
98
+ help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations",
99
+ )
100
+ initial_prompt = st.text_area(
101
+ label="initial_prompt",
102
+ help="optional text to provide as a prompt for the first window.",
103
+ )
104
+ condition_on_previous_text = st.checkbox(
105
+ "condition_on_previous_text",
106
+ help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop",
107
+ )
108
+ temperature_increment_on_fallback = st.number_input(
109
+ label="temperature_increment_on_fallback",
110
+ step=0.01,
111
+ value=0.2,
112
+ help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below",
113
+ )
114
+ compression_ratio_threshold = st.number_input(
115
+ label="compression_ratio_threshold",
116
+ value=2.4,
117
+ step=0.01,
118
+ help="if the gzip compression ratio is higher than this value, treat the decoding as failed",
119
+ )
120
+ logprob_threshold = st.number_input(
121
+ label="logprob_threshold",
122
+ value=-1.0,
123
+ step=0.01,
124
+ help="if the average log probability is lower than this value, treat the decoding as failed",
125
+ )
126
+ no_speech_threshold = st.number_input(
127
+ label="no_speech_threshold",
128
+ value=0.6,
129
+ step=0.01,
130
+ help="if the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence",
131
+ )
132
+ if temperature_increment_on_fallback is not None:
133
+ temperature = tuple(
134
+ np.arange(temperature, 1.0 + 1e-6, temperature_increment_on_fallback)
135
+ )
136
+ else:
137
+ temperature = [temperature]
138
+ # st.write(temperature)
139
+ submit = st.button("Submit", type="primary")
140
+ with output:
141
+ st.header("Output")
142
+ if submit:
143
+ if audio_uploaded is None:
144
+ # st.audio(audio_bytes, format="audio/wav")
145
+ audio_uploaded = audio_file
146
+ if audio_uploaded is not None:
147
+
148
+ # audio_bytes = audio_uploaded.read()
149
+ # st.audio(audio_bytes, format="audio/wav")
150
+ if language == "":
151
+ model = whisper.load_model(model_name)
152
+ with st.spinner("Detecting language..."):
153
+ detection = detect_language(audio_uploaded.name, model)
154
+ language = detection.get("detected_language")
155
+ del model
156
+ # st.write(language)
157
+ if len(language) > 2:
158
+ language = get_key(language)
159
+ segments_pre = st.empty()
160
+ segments_post = st.empty()
161
+ segments_post_json = st.empty()
162
+ segments_post2 = st.empty()
163
+ trans = st.empty()
164
+ lang = st.empty()
165
+ if text_json is None:
166
+ with st.spinner("Running ... "):
167
+ decode = {"suppress_tokens": suppress_tokens, "beam_size": 5}
168
+ model = whisper.load_model(model_name)
169
+ with st.container():
170
+ with st.spinner(f"Running with {model_name} model"):
171
+ result = model.transcribe(
172
+ audio_uploaded.name,
173
+ language=language,
174
+ patience=patience,
175
+ initial_prompt=initial_prompt,
176
+ condition_on_previous_text=condition_on_previous_text,
177
+ temperature=temperature,
178
+ compression_ratio_threshold=compression_ratio_threshold,
179
+ logprob_threshold=logprob_threshold,
180
+ no_speech_threshold=no_speech_threshold,
181
+ **decode,
182
+ )
183
+
184
+ if translate:
185
+ result = translate_to_english(result, json=False)
186
+ with open("transcription.json", "w") as f:
187
+ json.dump(result["segments"], f, indent=4, cls=ByteEncoder)
188
+ with st.spinner("Running alignment model ..."):
189
+ model_a, metadata = whisper.load_align_model(
190
+ language_code=result["language"], device=device
191
+ )
192
+ result_aligned = whisper.align(
193
+ result["segments"],
194
+ model_a,
195
+ metadata,
196
+ audio_uploaded.name,
197
+ device=device,
198
+ )
199
+
200
+ if text_json is not None:
201
+ if translate:
202
+ result = translate_to_english(text_json, json=True)
203
+ with st.spinner("Running alignment model ..."):
204
+ model_a, metadata = whisper.load_align_model(
205
+ language_code=language, device=device
206
+ )
207
+
208
+ result_aligned = whisper.align(
209
+ text_json, model_a, metadata, audio_uploaded.name, device
210
+ )
211
+
212
+ if text_json is None:
213
+ words_segments = result_aligned["word_segments"]
214
+ write(
215
+ audio_uploaded.name,
216
+ dtype=transcription,
217
+ result_aligned=result_aligned,
218
+ )
219
+ trans_text = read(audio_uploaded.name, transcription)
220
+ trans.text_area(
221
+ "transcription", trans_text, height=None, max_chars=None, key=None
222
+ )
223
+ segments_pre.text_area(
224
+ "Segments before alignment",
225
+ result["segments"],
226
+ height=None,
227
+ max_chars=None,
228
+ key=None,
229
+ )
230
+ segments_post.text_area(
231
+ "Word Segments after alignment",
232
+ result_aligned["word_segments"],
233
+ height=None,
234
+ max_chars=None,
235
+ key=None,
236
+ )
237
+ with open("segments.json", "w", encoding="utf-8") as f:
238
+
239
+ json.dump(result_aligned["word_segments"], f, indent=False)
240
+
241
+ segments_post2.text_area(
242
+ "Segments after alignment",
243
+ result_aligned["segments"],
244
+ height=None,
245
+ max_chars=None,
246
+ key=None,
247
+ )
248
+ lang.text_input(
249
+ "detected language", language_dict.get(language), disabled=True
250
+ )
audio.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Right now, at this very second, we're in the aftermath of the Big Bang
2
+ Everything we see.
3
+ And here, and police...
4
+ and smell.
5
+ And touch, is the aftermath
6
+ The Big Bang is really our evolving, expanding universe
7
+ For us, mostly stuck on our rocky little planet
8
+ the view of the universe begins with earth
9
+ This is Earth
constants.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGUAGES = [
2
+ "English",
3
+ "Chinese",
4
+ "German",
5
+ "Spanish",
6
+ "Russian",
7
+ "Korean",
8
+ "French",
9
+ "Japanese",
10
+ "Portuguese",
11
+ "Turkish",
12
+ "Polish",
13
+ "Catalan",
14
+ "Dutch",
15
+ "Arabic",
16
+ "Swedish",
17
+ "Italian",
18
+ "Indonesian",
19
+ "Hindi",
20
+ "Finnish",
21
+ "Vietnamese",
22
+ "Hebrew",
23
+ "Ukrainian",
24
+ "Greek",
25
+ "Malay",
26
+ "Czech",
27
+ "Romanian",
28
+ "Danish",
29
+ "Hungarian",
30
+ "Tamil",
31
+ "Norwegian",
32
+ "Thai",
33
+ "Urdu",
34
+ "Croatian",
35
+ "Bulgarian",
36
+ "Lithuanian",
37
+ "Latin",
38
+ "Maori",
39
+ "Malayalam",
40
+ "Welsh",
41
+ "Slovak",
42
+ "Telugu",
43
+ "Persian",
44
+ "Latvian",
45
+ "Bengali",
46
+ "Serbian",
47
+ "Azerbaijani",
48
+ "Slovenian",
49
+ "Kannada",
50
+ "Estonian",
51
+ "Macedonian",
52
+ "Breton",
53
+ "Basque",
54
+ "Icelandic",
55
+ "Armenian",
56
+ "Nepali",
57
+ "Mongolian",
58
+ "Bosnian",
59
+ "Kazakh",
60
+ "Albanian",
61
+ "Swahili",
62
+ "Galician",
63
+ "Marathi",
64
+ "Punjabi",
65
+ "Sinhala",
66
+ "Khmer",
67
+ "Shona",
68
+ "Yoruba",
69
+ "Somali",
70
+ "Afrikaans",
71
+ "Occitan",
72
+ "Georgian",
73
+ "Belarusian",
74
+ "Tajik",
75
+ "Sindhi",
76
+ "Gujarati",
77
+ "Amharic",
78
+ "Yiddish",
79
+ "Lao",
80
+ "Uzbek",
81
+ "Faroese",
82
+ "Haitian Creole",
83
+ "Pashto",
84
+ "Turkmen",
85
+ "Nynorsk",
86
+ "Maltese",
87
+ "Sanskrit",
88
+ "Luxembourgish",
89
+ "Myanmar",
90
+ "Tibetan",
91
+ "Tagalog",
92
+ "Malagasy",
93
+ "Assamese",
94
+ "Tatar",
95
+ "Hawaiian",
96
+ "Lingala",
97
+ "Hausa",
98
+ "Bashkir",
99
+ "Javanese",
100
+ "Sundanese",
101
+ ]
102
+
103
+ WHISPER_MODELS = [
104
+ "",
105
+ "tiny",
106
+ "base",
107
+ "small",
108
+ "medium",
109
+ "large",
110
+ ]
111
+ language_dict = {
112
+ "": "",
113
+ "en": "English",
114
+ "zh": "Chinese",
115
+ "de": "German",
116
+ "es": "Spanish",
117
+ "ru": "Russian",
118
+ "ko": "Korean",
119
+ "fr": "French",
120
+ "ja": "Japanese",
121
+ "pt": "Portuguese",
122
+ "tr": "Turkish",
123
+ "pl": "Polish",
124
+ "ca": "Catalan",
125
+ "nl": "Dutch",
126
+ "ar": "Arabic",
127
+ "sv": "Swedish",
128
+ "it": "Italian",
129
+ "id": "Indonesian",
130
+ "hi": "Hindi",
131
+ "fi": "Finnish",
132
+ "vi": "Vietnamese",
133
+ "he": "Hebrew",
134
+ "uk": "Ukrainian",
135
+ "el": "Greek",
136
+ "ms": "Malay",
137
+ "cs": "Czech",
138
+ "ro": "Romanian",
139
+ "da": "Danish",
140
+ "hu": "Hungarian",
141
+ "ta": "Tamil",
142
+ "no": "Norwegian",
143
+ "th": "Thai",
144
+ "ur": "Urdu",
145
+ "hr": "Croatian",
146
+ "bg": "Bulgarian",
147
+ "lt": "Lithuanian",
148
+ "la": "Latin",
149
+ "mi": "Maori",
150
+ "ml": "Malayalam",
151
+ "cy": "Welsh",
152
+ "sk": "Slovak",
153
+ "te": "Telugu",
154
+ "fa": "Persian",
155
+ "lv": "Latvian",
156
+ "bn": "Bengali",
157
+ "sr": "Serbian",
158
+ "az": "Azerbaijani",
159
+ "sl": "Slovenian",
160
+ "kn": "Kannada",
161
+ "et": "Estonian",
162
+ "mk": "Macedonian",
163
+ "br": "Breton",
164
+ "eu": "Basque",
165
+ "is": "Icelandic",
166
+ "hy": "Armenian",
167
+ "ne": "Nepali",
168
+ "mn": "Mongolian",
169
+ "bs": "Bosnian",
170
+ "kk": "Kazakh",
171
+ "sq": "Albanian",
172
+ "sw": "Swahili",
173
+ "gl": "Galician",
174
+ "mr": "Marathi",
175
+ "pa": "Punjabi",
176
+ "si": "Sinhala",
177
+ "km": "Khmer",
178
+ "sn": "Shona",
179
+ "yo": "Yoruba",
180
+ }
181
+
182
+
183
+ all_languages = list(language_dict.keys()) + list(language_dict.values())
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/m-bain/whisperx.git
2
+ streamlit
3
+ pandas
4
+ numpy
5
+ torch
6
+ torchaudio
7
+ tqdm
8
+ more-itertools
9
+ transformers>=4.19.0
10
+ ffmpeg-python==0.2.0
11
+ pyannote.audio
12
+ soundfile
segments.json ADDED
@@ -0,0 +1,1602 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "Right",
4
+ "start": 1.2872727272727273,
5
+ "end": 1.5085227272727273
6
+ },
7
+ {
8
+ "text": "now",
9
+ "start": 1.5688636363636363,
10
+ "end": 1.77
11
+ },
12
+ {
13
+ "text": "at",
14
+ "start": 2.0113636363636367,
15
+ "end": 2.091818181818182
16
+ },
17
+ {
18
+ "text": "this",
19
+ "start": 2.1119318181818185,
20
+ "end": 2.272840909090909
21
+ },
22
+ {
23
+ "text": "very",
24
+ "start": 2.373409090909091,
25
+ "end": 2.6751136363636365
26
+ },
27
+ {
28
+ "text": "second.",
29
+ "start": 2.7756818181818184,
30
+ "end": 3.0975
31
+ },
32
+ {
33
+ "text": "We're",
34
+ "start": 3.9425641025641025,
35
+ "end": 4.163974358974359
36
+ },
37
+ {
38
+ "text": "in",
39
+ "start": 4.184102564102564,
40
+ "end": 4.244487179487179
41
+ },
42
+ {
43
+ "text": "the",
44
+ "start": 4.28474358974359,
45
+ "end": 4.365256410256411
46
+ },
47
+ {
48
+ "text": "aftermath",
49
+ "start": 4.546410256410256,
50
+ "end": 5.19051282051282
51
+ },
52
+ {
53
+ "text": "of",
54
+ "start": 5.351538461538461,
55
+ "end": 5.432051282051281
56
+ },
57
+ {
58
+ "text": "the",
59
+ "start": 5.472307692307693,
60
+ "end": 5.552820512820513
61
+ },
62
+ {
63
+ "text": "Big",
64
+ "start": 5.572948717948718,
65
+ "end": 5.774230769230769
66
+ },
67
+ {
68
+ "text": "Bang.",
69
+ "start": 5.85474358974359,
70
+ "end": 6.156666666666666
71
+ },
72
+ {
73
+ "text": "Everything",
74
+ "start": 7.201044176706827,
75
+ "end": 7.702048192771084
76
+ },
77
+ {
78
+ "text": "we",
79
+ "start": 7.7621686746987955,
80
+ "end": 7.942530120481928
81
+ },
82
+ {
83
+ "text": "see",
84
+ "start": 8.04273092369478,
85
+ "end": 8.403453815261043
86
+ },
87
+ {
88
+ "text": "in",
89
+ "start": 8.503654618473895,
90
+ "end": 9.205060240963856
91
+ },
92
+ {
93
+ "text": "here",
94
+ "start": 9.345341365461847,
95
+ "end": 9.726104417670683
96
+ },
97
+ {
98
+ "text": "and",
99
+ "start": 10.447550200803214,
100
+ "end": 10.547751004016064
101
+ },
102
+ {
103
+ "text": "taste",
104
+ "start": 10.688032128514056,
105
+ "end": 11.269196787148594
106
+ },
107
+ {
108
+ "text": "and",
109
+ "start": 12.070803212851406,
110
+ "end": 12.191044176706828
111
+ },
112
+ {
113
+ "text": "smell,",
114
+ "start": 12.31128514056225,
115
+ "end": 12.752168674698796
116
+ },
117
+ {
118
+ "text": "and",
119
+ "start": 13.273212851405622,
120
+ "end": 13.373413654618474
121
+ },
122
+ {
123
+ "text": "touch,",
124
+ "start": 13.473614457831324,
125
+ "end": 13.874417670682732
126
+ },
127
+ {
128
+ "text": "is",
129
+ "start": 14.395461847389559,
130
+ "end": 14.49566265060241
131
+ },
132
+ {
133
+ "text": "the",
134
+ "start": 14.53574297188755,
135
+ "end": 14.635943775100401
136
+ },
137
+ {
138
+ "text": "aftermath.",
139
+ "start": 14.736144578313253,
140
+ "end": 15.217108433734941
141
+ },
142
+ {
143
+ "text": "The",
144
+ "start": 17.121672727272728,
145
+ "end": 17.24210909090909
146
+ },
147
+ {
148
+ "text": "Big",
149
+ "start": 17.322400000000002,
150
+ "end": 17.5432
151
+ },
152
+ {
153
+ "text": "Bang",
154
+ "start": 17.643563636363638,
155
+ "end": 18.004872727272726
156
+ },
157
+ {
158
+ "text": "is",
159
+ "start": 18.326036363636362,
160
+ "end": 18.4264
161
+ },
162
+ {
163
+ "text": "really",
164
+ "start": 18.526763636363636,
165
+ "end": 18.908145454545455
166
+ },
167
+ {
168
+ "text": "our",
169
+ "start": 18.92821818181818,
170
+ "end": 19.209236363636364
171
+ },
172
+ {
173
+ "text": "evolving",
174
+ "start": 19.329672727272726,
175
+ "end": 19.931854545454545
176
+ },
177
+ {
178
+ "text": "expanding",
179
+ "start": 20.25301818181818,
180
+ "end": 20.97563636363636
181
+ },
182
+ {
183
+ "text": "universe.",
184
+ "start": 21.176363636363636,
185
+ "end": 21.617963636363637
186
+ },
187
+ {
188
+ "text": "For",
189
+ "start": 24.141747572815536,
190
+ "end": 24.342718446601943
191
+ },
192
+ {
193
+ "text": "us,",
194
+ "start": 24.48339805825243,
195
+ "end": 24.624077669902913
196
+ },
197
+ {
198
+ "text": "mostly",
199
+ "start": 25.327475728155342,
200
+ "end": 25.80980582524272
201
+ },
202
+ {
203
+ "text": "stuck",
204
+ "start": 25.890194174757283,
205
+ "end": 26.171553398058254
206
+ },
207
+ {
208
+ "text": "on",
209
+ "start": 26.25194174757282,
210
+ "end": 26.33233009708738
211
+ },
212
+ {
213
+ "text": "our",
214
+ "start": 26.412718446601943,
215
+ "end": 26.513203883495148
216
+ },
217
+ {
218
+ "text": "rocky",
219
+ "start": 26.59359223300971,
220
+ "end": 26.91514563106796
221
+ },
222
+ {
223
+ "text": "little",
224
+ "start": 26.975436893203884,
225
+ "end": 27.216601941747573
226
+ },
227
+ {
228
+ "text": "planet.",
229
+ "start": 27.276893203883496,
230
+ "end": 27.61854368932039
231
+ },
232
+ {
233
+ "text": "The",
234
+ "start": 28.743076923076924,
235
+ "end": 28.863916083916084
236
+ },
237
+ {
238
+ "text": "view",
239
+ "start": 28.904195804195805,
240
+ "end": 29.125734265734266
241
+ },
242
+ {
243
+ "text": "of",
244
+ "start": 29.166013986013986,
245
+ "end": 29.226433566433567
246
+ },
247
+ {
248
+ "text": "the",
249
+ "start": 29.266713286713287,
250
+ "end": 29.367412587412588
251
+ },
252
+ {
253
+ "text": "universe",
254
+ "start": 29.46811188811189,
255
+ "end": 29.931328671328675
256
+ },
257
+ {
258
+ "text": "begins",
259
+ "start": 30.173006993006997,
260
+ "end": 30.63622377622378
261
+ },
262
+ {
263
+ "text": "with",
264
+ "start": 30.73692307692308,
265
+ "end": 30.8779020979021
266
+ },
267
+ {
268
+ "text": "Earth.",
269
+ "start": 30.978601398601402,
270
+ "end": 31.079300699300703
271
+ },
272
+ {
273
+ "text": "This,",
274
+ "start": 32.70305882352942,
275
+ "end": 32.94588235294118
276
+ },
277
+ {
278
+ "text": "is",
279
+ "start": 33.451764705882354,
280
+ "end": 33.512470588235296
281
+ },
282
+ {
283
+ "text": "Earth.",
284
+ "start": 33.593411764705884,
285
+ "end": 33.75529411764706
286
+ },
287
+ {
288
+ "text": "Silicon",
289
+ "start": 34.74190476190476,
290
+ "end": 35.28476190476191
291
+ },
292
+ {
293
+ "text": "and",
294
+ "start": 35.38529100529101,
295
+ "end": 35.48582010582011
296
+ },
297
+ {
298
+ "text": "oxygen",
299
+ "start": 35.66677248677249,
300
+ "end": 36.04878306878307
301
+ },
302
+ {
303
+ "text": "based,",
304
+ "start": 36.10910052910053,
305
+ "end": 36.410687830687834
306
+ },
307
+ {
308
+ "text": "with",
309
+ "start": 36.873121693121696,
310
+ "end": 36.99375661375662
311
+ },
312
+ {
313
+ "text": "a",
314
+ "start": 37.013862433862435,
315
+ "end": 37.033968253968254
316
+ },
317
+ {
318
+ "text": "metallic",
319
+ "start": 37.07417989417989,
320
+ "end": 37.516507936507935
321
+ },
322
+ {
323
+ "text": "core.",
324
+ "start": 37.59693121693122,
325
+ "end": 37.91862433862434
326
+ },
327
+ {
328
+ "text": "The",
329
+ "start": 39.424,
330
+ "end": 39.525111111111116
331
+ },
332
+ {
333
+ "text": "surface",
334
+ "start": 39.565555555555555,
335
+ "end": 39.99022222222222
336
+ },
337
+ {
338
+ "text": "is",
339
+ "start": 40.111555555555555,
340
+ "end": 40.19244444444445
341
+ },
342
+ {
343
+ "text": "mostly",
344
+ "start": 40.25311111111111,
345
+ "end": 40.617111111111114
346
+ },
347
+ {
348
+ "text": "water.",
349
+ "start": 40.698,
350
+ "end": 40.85977777777778
351
+ },
352
+ {
353
+ "text": "It",
354
+ "start": 42.442009569377994,
355
+ "end": 42.52239234449761
356
+ },
357
+ {
358
+ "text": "teens",
359
+ "start": 42.642966507177036,
360
+ "end": 43.105167464114835
361
+ },
362
+ {
363
+ "text": "with",
364
+ "start": 43.165454545454544,
365
+ "end": 43.346315789473685
366
+ },
367
+ {
368
+ "text": "life",
369
+ "start": 43.4266985645933,
370
+ "end": 43.66784688995215
371
+ },
372
+ {
373
+ "text": "and",
374
+ "start": 43.96928229665072,
375
+ "end": 44.06976076555024
376
+ },
377
+ {
378
+ "text": "rotates",
379
+ "start": 44.17023923444976,
380
+ "end": 44.67263157894737
381
+ },
382
+ {
383
+ "text": "once",
384
+ "start": 44.87358851674641,
385
+ "end": 45.03435406698564
386
+ },
387
+ {
388
+ "text": "every",
389
+ "start": 45.114736842105266,
390
+ "end": 45.2955980861244
391
+ },
392
+ {
393
+ "text": "twenty-four",
394
+ "start": 45.355885167464116,
395
+ "end": 45.878373205741624
396
+ },
397
+ {
398
+ "text": "hours,",
399
+ "start": 45.91856459330143,
400
+ "end": 46.19990430622009
401
+ },
402
+ {
403
+ "text": "while",
404
+ "start": 46.48041666666667,
405
+ "end": 46.66116666666667
406
+ },
407
+ {
408
+ "text": "orbiting",
409
+ "start": 46.801750000000006,
410
+ "end": 47.163250000000005
411
+ },
412
+ {
413
+ "text": "a",
414
+ "start": 47.2235,
415
+ "end": 47.26366666666667
416
+ },
417
+ {
418
+ "text": "star",
419
+ "start": 47.36408333333333,
420
+ "end": 47.72558333333333
421
+ },
422
+ {
423
+ "text": "called",
424
+ "start": 47.826,
425
+ "end": 48.10716666666667
426
+ },
427
+ {
428
+ "text": "the",
429
+ "start": 48.147333333333336,
430
+ "end": 48.24775
431
+ },
432
+ {
433
+ "text": "sun,",
434
+ "start": 48.32808333333334,
435
+ "end": 48.629333333333335
436
+ },
437
+ {
438
+ "text": "every 365",
439
+ "start": 49.01091666666667,
440
+ "end": 49.231833333333334
441
+ },
442
+ {
443
+ "text": "days.",
444
+ "start": 49.272000000000006,
445
+ "end": 49.352333333333334
446
+ },
447
+ {
448
+ "text": "From",
449
+ "start": 51.80240963855422,
450
+ "end": 52.14377510040161
451
+ },
452
+ {
453
+ "text": "what",
454
+ "start": 52.16385542168675,
455
+ "end": 52.5855421686747
456
+ },
457
+ {
458
+ "text": "I",
459
+ "start": 52.60562248995984,
460
+ "end": 52.66586345381526
461
+ },
462
+ {
463
+ "text": "said",
464
+ "start": 53.388755020080325,
465
+ "end": 54.49317269076305
466
+ },
467
+ {
468
+ "text": "here...",
469
+ "start": 54.513253012048196,
470
+ "end": 54.73413654618474
471
+ },
472
+ {
473
+ "text": "This",
474
+ "start": 56.260258620689655,
475
+ "end": 56.360689655172415
476
+ },
477
+ {
478
+ "text": "is",
479
+ "start": 56.40086206896552,
480
+ "end": 56.52137931034483
481
+ },
482
+ {
483
+ "text": "the",
484
+ "start": 56.581637931034486,
485
+ "end": 56.64189655172414
486
+ },
487
+ {
488
+ "text": "Sun,",
489
+ "start": 56.661982758620695,
490
+ "end": 57.22439655172414
491
+ },
492
+ {
493
+ "text": "mostly",
494
+ "start": 57.24448275862069,
495
+ "end": 57.666293103448275
496
+ },
497
+ {
498
+ "text": "hydrogen",
499
+ "start": 57.766724137931035,
500
+ "end": 58.26887931034483
501
+ },
502
+ {
503
+ "text": "and",
504
+ "start": 58.30905172413793,
505
+ "end": 58.38939655172414
506
+ },
507
+ {
508
+ "text": "helium.",
509
+ "start": 58.44965517241379,
510
+ "end": 58.831293103448274
511
+ },
512
+ {
513
+ "text": "Its",
514
+ "start": 60.86,
515
+ "end": 61.10089552238806
516
+ },
517
+ {
518
+ "text": "surface",
519
+ "start": 61.181194029850744,
520
+ "end": 61.502388059701495
521
+ },
522
+ {
523
+ "text": "temperature",
524
+ "start": 61.54253731343284,
525
+ "end": 62.18492537313433
526
+ },
527
+ {
528
+ "text": "is",
529
+ "start": 62.36559701492538,
530
+ "end": 62.48604477611941
531
+ },
532
+ {
533
+ "text": "nearly 10,000",
534
+ "start": 62.50611940298508,
535
+ "end": 62.80723880597015
536
+ },
537
+ {
538
+ "text": "degrees",
539
+ "start": 62.867462686567166,
540
+ "end": 63.32917910447762
541
+ },
542
+ {
543
+ "text": "Fahrenheit.",
544
+ "start": 63.44962686567165,
545
+ "end": 63.951492537313435
546
+ },
547
+ {
548
+ "text": "For",
549
+ "start": 67.54236734693878,
550
+ "end": 67.70302040816327
551
+ },
552
+ {
553
+ "text": "energy,",
554
+ "start": 67.76326530612245,
555
+ "end": 68.12473469387756
556
+ },
557
+ {
558
+ "text": "our",
559
+ "start": 68.48620408163266,
560
+ "end": 68.60669387755102
561
+ },
562
+ {
563
+ "text": "Sun",
564
+ "start": 68.70710204081632,
565
+ "end": 68.96816326530613
566
+ },
567
+ {
568
+ "text": "converged 700",
569
+ "start": 69.04848979591837,
570
+ "end": 70.2734693877551
571
+ },
572
+ {
573
+ "text": "million",
574
+ "start": 70.39395918367347,
575
+ "end": 70.77551020408163
576
+ },
577
+ {
578
+ "text": "tons",
579
+ "start": 70.896,
580
+ "end": 71.19722448979591
581
+ },
582
+ {
583
+ "text": "of",
584
+ "start": 71.2574693877551,
585
+ "end": 71.31771428571429
586
+ },
587
+ {
588
+ "text": "hydrogen",
589
+ "start": 71.33779591836735,
590
+ "end": 71.88
591
+ },
592
+ {
593
+ "text": "into 695",
594
+ "start": 72.34141791044777,
595
+ "end": 73.1644776119403
596
+ },
597
+ {
598
+ "text": "billion",
599
+ "start": 73.96746268656717,
600
+ "end": 74.32880597014926
601
+ },
602
+ {
603
+ "text": "tons",
604
+ "start": 74.40910447761195,
605
+ "end": 74.67007462686568
606
+ },
607
+ {
608
+ "text": "of",
609
+ "start": 74.71022388059703,
610
+ "end": 74.79052238805971
611
+ },
612
+ {
613
+ "text": "helium",
614
+ "start": 74.89089552238806,
615
+ "end": 75.35261194029852
616
+ },
617
+ {
618
+ "text": "every",
619
+ "start": 75.61358208955224,
620
+ "end": 75.91470149253732
621
+ },
622
+ {
623
+ "text": "second.",
624
+ "start": 76.05522388059703,
625
+ "end": 76.41656716417911
626
+ },
627
+ {
628
+ "text": "Sun",
629
+ "start": 77.36006211180124,
630
+ "end": 77.62086956521739
631
+ },
632
+ {
633
+ "text": "is",
634
+ "start": 77.78136645962734,
635
+ "end": 77.82149068322981
636
+ },
637
+ {
638
+ "text": "in",
639
+ "start": 77.84155279503106,
640
+ "end": 77.88167701863354
641
+ },
642
+ {
643
+ "text": "part",
644
+ "start": 77.92180124223603,
645
+ "end": 78.12242236024845
646
+ },
647
+ {
648
+ "text": "of",
649
+ "start": 78.16254658385094,
650
+ "end": 78.22273291925467
651
+ },
652
+ {
653
+ "text": "a",
654
+ "start": 78.26285714285714,
655
+ "end": 78.30298136645963
656
+ },
657
+ {
658
+ "text": "solar",
659
+ "start": 78.3832298136646,
660
+ "end": 78.72428571428571
661
+ },
662
+ {
663
+ "text": "system",
664
+ "start": 78.78447204968944,
665
+ "end": 79.14559006211181
666
+ },
667
+ {
668
+ "text": "formed",
669
+ "start": 79.30608695652174,
670
+ "end": 79.72739130434783
671
+ },
672
+ {
673
+ "text": "around 4.5",
674
+ "start": 79.8076397515528,
675
+ "end": 81.11167701863354
676
+ },
677
+ {
678
+ "text": "billion",
679
+ "start": 81.29223602484473,
680
+ "end": 81.73360248447204
681
+ },
682
+ {
683
+ "text": "years",
684
+ "start": 81.77372670807453,
685
+ "end": 82.01447204968945
686
+ },
687
+ {
688
+ "text": "ago",
689
+ "start": 82.07465838509317,
690
+ "end": 82.31540372670807
691
+ },
692
+ {
693
+ "text": "that",
694
+ "start": 82.69658385093167,
695
+ "end": 82.83701863354037
696
+ },
697
+ {
698
+ "text": "includes",
699
+ "start": 82.8972049689441,
700
+ "end": 83.33857142857143
701
+ },
702
+ {
703
+ "text": "earth",
704
+ "start": 83.47900621118012,
705
+ "end": 83.6595652173913
706
+ },
707
+ {
708
+ "text": "and",
709
+ "start": 83.9405577689243,
710
+ "end": 84.0610358565737
711
+ },
712
+ {
713
+ "text": "seven",
714
+ "start": 84.1614342629482,
715
+ "end": 84.48270916334661
716
+ },
717
+ {
718
+ "text": "other",
719
+ "start": 84.60318725099602,
720
+ "end": 84.82406374501991
721
+ },
722
+ {
723
+ "text": "orbiting",
724
+ "start": 85.06501992031872,
725
+ "end": 85.40637450199203
726
+ },
727
+ {
728
+ "text": "planets",
729
+ "start": 85.48669322709164,
730
+ "end": 85.88828685258964
731
+ },
732
+ {
733
+ "text": "from",
734
+ "start": 86.08908366533865,
735
+ "end": 86.26980079681275
736
+ },
737
+ {
738
+ "text": "Mercury",
739
+ "start": 86.35011952191235,
740
+ "end": 86.81195219123506
741
+ },
742
+ {
743
+ "text": "to",
744
+ "start": 87.67537848605578,
745
+ "end": 87.77577689243029
746
+ },
747
+ {
748
+ "text": "Neptune.",
749
+ "start": 87.85609561752989,
750
+ "end": 88.2978486055777
751
+ },
752
+ {
753
+ "text": "And",
754
+ "start": 89.12161849710984,
755
+ "end": 89.26242774566474
756
+ },
757
+ {
758
+ "text": "its",
759
+ "start": 89.28254335260117,
760
+ "end": 89.34289017341041
761
+ },
762
+ {
763
+ "text": "isn't",
764
+ "start": 89.48369942196533,
765
+ "end": 89.72508670520232
766
+ },
767
+ {
768
+ "text": "a",
769
+ "start": 89.78543352601156,
770
+ "end": 89.8256647398844
771
+ },
772
+ {
773
+ "text": "stationary",
774
+ "start": 89.88601156069365,
775
+ "end": 90.5699421965318
776
+ },
777
+ {
778
+ "text": "system.",
779
+ "start": 90.65040462427746,
780
+ "end": 90.97225433526012
781
+ },
782
+ {
783
+ "text": "Our",
784
+ "start": 93.72036036036036,
785
+ "end": 93.88072072072072
786
+ },
787
+ {
788
+ "text": "solar",
789
+ "start": 93.98094594594595,
790
+ "end": 94.32171171171171
791
+ },
792
+ {
793
+ "text": "system",
794
+ "start": 94.3618018018018,
795
+ "end": 94.68252252252253
796
+ },
797
+ {
798
+ "text": "is",
799
+ "start": 94.96315315315316,
800
+ "end": 95.04333333333334
801
+ },
802
+ {
803
+ "text": "spinning,",
804
+ "start": 95.1636036036036,
805
+ "end": 95.56450450450451
806
+ },
807
+ {
808
+ "text": "flying",
809
+ "start": 96.66698198198199,
810
+ "end": 97.04783783783785
811
+ },
812
+ {
813
+ "text": "through",
814
+ "start": 97.12801801801803,
815
+ "end": 97.36855855855856
816
+ },
817
+ {
818
+ "text": "space",
819
+ "start": 97.46878378378379,
820
+ "end": 97.80954954954956
821
+ },
822
+ {
823
+ "text": "at 134",
824
+ "start": 98.0500900900901,
825
+ "end": 98.13027027027027
826
+ },
827
+ {
828
+ "text": "miles",
829
+ "start": 98.25054054054054,
830
+ "end": 98.41090090090091
831
+ },
832
+ {
833
+ "text": "per",
834
+ "start": 98.47103603603604,
835
+ "end": 98.57126126126127
836
+ },
837
+ {
838
+ "text": "second.",
839
+ "start": 98.6313963963964,
840
+ "end": 98.79175675675677
841
+ },
842
+ {
843
+ "text": "turning",
844
+ "start": 102.6807284768212,
845
+ "end": 103.0619867549669
846
+ },
847
+ {
848
+ "text": "in",
849
+ "start": 103.2225165562914,
850
+ "end": 103.32284768211922
851
+ },
852
+ {
853
+ "text": "circles",
854
+ "start": 103.42317880794702,
855
+ "end": 103.84456953642385
856
+ },
857
+ {
858
+ "text": "as",
859
+ "start": 104.60708609271524,
860
+ "end": 104.68735099337749
861
+ },
862
+ {
863
+ "text": "part",
864
+ "start": 104.7876821192053,
865
+ "end": 104.98834437086093
866
+ },
867
+ {
868
+ "text": "of",
869
+ "start": 105.02847682119206,
870
+ "end": 105.08867549668875
871
+ },
872
+ {
873
+ "text": "a",
874
+ "start": 105.14887417218543,
875
+ "end": 105.168940397351
876
+ },
877
+ {
878
+ "text": "vast",
879
+ "start": 105.28933774834438,
880
+ "end": 105.65052980132451
881
+ },
882
+ {
883
+ "text": "collection",
884
+ "start": 105.75086092715232,
885
+ "end": 106.27258278145696
886
+ },
887
+ {
888
+ "text": "of",
889
+ "start": 106.33278145695364,
890
+ "end": 106.39298013245033
891
+ },
892
+ {
893
+ "text": "stars",
894
+ "start": 106.47324503311259,
895
+ "end": 107.01503311258278
896
+ },
897
+ {
898
+ "text": "and",
899
+ "start": 107.11536423841059,
900
+ "end": 107.19562913907285
901
+ },
902
+ {
903
+ "text": "star",
904
+ "start": 107.29596026490066,
905
+ "end": 107.63708609271524
906
+ },
907
+ {
908
+ "text": "systems.",
909
+ "start": 107.71735099337748,
910
+ "end": 108.1387417218543
911
+ },
912
+ {
913
+ "text": "There",
914
+ "start": 109.02,
915
+ "end": 109.12033557046979
916
+ },
917
+ {
918
+ "text": "may",
919
+ "start": 109.14040268456375,
920
+ "end": 109.30093959731543
921
+ },
922
+ {
923
+ "text": "be 200",
924
+ "start": 109.3611409395973,
925
+ "end": 109.82268456375839
926
+ },
927
+ {
928
+ "text": "billion",
929
+ "start": 110.58523489932885,
930
+ "end": 111.00664429530201
931
+ },
932
+ {
933
+ "text": "stars",
934
+ "start": 111.1069798657718,
935
+ "end": 111.54845637583892
936
+ },
937
+ {
938
+ "text": "in",
939
+ "start": 111.72906040268457,
940
+ "end": 111.78926174496644
941
+ },
942
+ {
943
+ "text": "this",
944
+ "start": 111.8093288590604,
945
+ "end": 111.94979865771812
946
+ },
947
+ {
948
+ "text": "collection",
949
+ "start": 111.98993288590604,
950
+ "end": 112.511677852349
951
+ },
952
+ {
953
+ "text": "called",
954
+ "start": 112.61201342281879,
955
+ "end": 112.93308724832215
956
+ },
957
+ {
958
+ "text": "the",
959
+ "start": 112.99328859060402,
960
+ "end": 113.07355704697986
961
+ },
962
+ {
963
+ "text": "Milky",
964
+ "start": 113.17389261744967,
965
+ "end": 113.59530201342282
966
+ },
967
+ {
968
+ "text": "Way",
969
+ "start": 113.6555033557047,
970
+ "end": 113.89630872483221
971
+ },
972
+ {
973
+ "text": "Galaxy.",
974
+ "start": 114.03677852348993,
975
+ "end": 114.55852348993288
976
+ },
977
+ {
978
+ "text": "And",
979
+ "start": 115.60069204152248,
980
+ "end": 115.68096885813148
981
+ },
982
+ {
983
+ "text": "estimated 6",
984
+ "start": 115.78131487889273,
985
+ "end": 116.24290657439445
986
+ },
987
+ {
988
+ "text": "billion",
989
+ "start": 116.92525951557093,
990
+ "end": 117.3667820069204
991
+ },
992
+ {
993
+ "text": "of",
994
+ "start": 117.38685121107265,
995
+ "end": 117.4470588235294
996
+ },
997
+ {
998
+ "text": "those",
999
+ "start": 117.4871972318339,
1000
+ "end": 117.68788927335639
1001
+ },
1002
+ {
1003
+ "text": "stars",
1004
+ "start": 117.74809688581314,
1005
+ "end": 118.14948096885813
1006
+ },
1007
+ {
1008
+ "text": "with",
1009
+ "start": 118.35017301038062,
1010
+ "end": 118.49065743944635
1011
+ },
1012
+ {
1013
+ "text": "planetary",
1014
+ "start": 118.5910034602076,
1015
+ "end": 119.25328719723183
1016
+ },
1017
+ {
1018
+ "text": "systems",
1019
+ "start": 119.33356401384081,
1020
+ "end": 119.7750865051903
1021
+ },
1022
+ {
1023
+ "text": "like",
1024
+ "start": 120.07612456747404,
1025
+ "end": 120.25674740484428
1026
+ },
1027
+ {
1028
+ "text": "ours.",
1029
+ "start": 120.41730103806228,
1030
+ "end": 120.61799307958476
1031
+ },
1032
+ {
1033
+ "text": "Our",
1034
+ "start": 122.18071895424836,
1035
+ "end": 122.28104575163398
1036
+ },
1037
+ {
1038
+ "text": "Solar",
1039
+ "start": 122.34124183006536,
1040
+ "end": 122.68235294117646
1041
+ },
1042
+ {
1043
+ "text": "System",
1044
+ "start": 122.74254901960784,
1045
+ "end": 123.08366013071895
1046
+ },
1047
+ {
1048
+ "text": "orbits",
1049
+ "start": 123.2843137254902,
1050
+ "end": 123.58529411764705
1051
+ },
1052
+ {
1053
+ "text": "the",
1054
+ "start": 123.64549019607843,
1055
+ "end": 123.76588235294118
1056
+ },
1057
+ {
1058
+ "text": "center",
1059
+ "start": 123.82607843137254,
1060
+ "end": 124.16718954248365
1061
+ },
1062
+ {
1063
+ "text": "of",
1064
+ "start": 124.32771241830065,
1065
+ "end": 124.38790849673202
1066
+ },
1067
+ {
1068
+ "text": "the",
1069
+ "start": 124.42803921568627,
1070
+ "end": 124.50830065359476
1071
+ },
1072
+ {
1073
+ "text": "Milky",
1074
+ "start": 124.54843137254902,
1075
+ "end": 124.84941176470588
1076
+ },
1077
+ {
1078
+ "text": "Way,",
1079
+ "start": 124.92967320261437,
1080
+ "end": 125.23065359477124
1081
+ },
1082
+ {
1083
+ "text": "on",
1084
+ "start": 125.89281045751633,
1085
+ "end": 125.97307189542484
1086
+ },
1087
+ {
1088
+ "text": "one",
1089
+ "start": 126.07339869281046,
1090
+ "end": 126.17372549019608
1091
+ },
1092
+ {
1093
+ "text": "of",
1094
+ "start": 126.21385620915032,
1095
+ "end": 126.25398692810457
1096
+ },
1097
+ {
1098
+ "text": "it's",
1099
+ "start": 126.33424836601307,
1100
+ "end": 126.4546405228758
1101
+ },
1102
+ {
1103
+ "text": "outer",
1104
+ "start": 126.63522875816993,
1105
+ "end": 126.89607843137254
1106
+ },
1107
+ {
1108
+ "text": "arms.",
1109
+ "start": 127.17699346405229,
1110
+ "end": 127.41777777777777
1111
+ },
1112
+ {
1113
+ "text": "The",
1114
+ "start": 131.10209497206702,
1115
+ "end": 131.20216480446928
1116
+ },
1117
+ {
1118
+ "text": "Milky",
1119
+ "start": 131.24219273743017,
1120
+ "end": 131.56241620111732
1121
+ },
1122
+ {
1123
+ "text": "Way",
1124
+ "start": 131.6024441340782,
1125
+ "end": 131.88263966480446
1126
+ },
1127
+ {
1128
+ "text": "is",
1129
+ "start": 131.90265363128492,
1130
+ "end": 131.94268156424582
1131
+ },
1132
+ {
1133
+ "text": "a",
1134
+ "start": 132.16283519553073,
1135
+ "end": 132.20286312849163
1136
+ },
1137
+ {
1138
+ "text": "snow",
1139
+ "start": 132.22287709497206,
1140
+ "end": 132.42301675977654
1141
+ },
1142
+ {
1143
+ "text": "of 235",
1144
+ "start": 132.4630446927374,
1145
+ "end": 132.5030726256983
1146
+ },
1147
+ {
1148
+ "text": "billion",
1149
+ "start": 132.5431005586592,
1150
+ "end": 132.90335195530724
1151
+ },
1152
+ {
1153
+ "text": "galaxies",
1154
+ "start": 132.9233659217877,
1155
+ "end": 133.24358938547485
1156
+ },
1157
+ {
1158
+ "text": "that",
1159
+ "start": 133.2636033519553,
1160
+ "end": 133.40370111731843
1161
+ },
1162
+ {
1163
+ "text": "predicts",
1164
+ "start": 133.42371508379887,
1165
+ "end": 133.8239944134078
1166
+ },
1167
+ {
1168
+ "text": "the",
1169
+ "start": 133.9040502793296,
1170
+ "end": 134.00412011173185
1171
+ },
1172
+ {
1173
+ "text": "unique",
1174
+ "start": 134.02413407821228,
1175
+ "end": 134.3643715083799
1176
+ },
1177
+ {
1178
+ "text": "if",
1179
+ "start": 158.08481751824817,
1180
+ "end": 158.1851824817518
1181
+ },
1182
+ {
1183
+ "text": "the",
1184
+ "start": 158.22532846715328,
1185
+ "end": 158.3256934306569
1186
+ },
1187
+ {
1188
+ "text": "universe",
1189
+ "start": 158.40598540145984,
1190
+ "end": 158.86766423357665
1191
+ },
1192
+ {
1193
+ "text": "is",
1194
+ "start": 158.98810218978102,
1195
+ "end": 159.08846715328465
1196
+ },
1197
+ {
1198
+ "text": "expanding",
1199
+ "start": 159.14868613138685,
1200
+ "end": 159.69065693430656
1201
+ },
1202
+ {
1203
+ "text": "then",
1204
+ "start": 160.3129197080292,
1205
+ "end": 160.43335766423357
1206
+ },
1207
+ {
1208
+ "text": "it",
1209
+ "start": 160.47350364963503,
1210
+ "end": 160.5337226277372
1211
+ },
1212
+ {
1213
+ "text": "used",
1214
+ "start": 160.61401459854014,
1215
+ "end": 160.77459854014597
1216
+ },
1217
+ {
1218
+ "text": "to",
1219
+ "start": 160.7946715328467,
1220
+ "end": 160.87496350364964
1221
+ },
1222
+ {
1223
+ "text": "be",
1224
+ "start": 160.91510948905108,
1225
+ "end": 161.03554744525547
1226
+ },
1227
+ {
1228
+ "text": "smaller",
1229
+ "start": 161.11583941605838,
1230
+ "end": 161.49722627737225
1231
+ },
1232
+ {
1233
+ "text": "much",
1234
+ "start": 163.04609374999998,
1235
+ "end": 163.28796875
1236
+ },
1237
+ {
1238
+ "text": "smaller",
1239
+ "start": 163.3484375,
1240
+ "end": 163.73140625
1241
+ },
1242
+ {
1243
+ "text": "in",
1244
+ "start": 165.54275590551183,
1245
+ "end": 165.62307086614175
1246
+ },
1247
+ {
1248
+ "text": "fact",
1249
+ "start": 165.72346456692912,
1250
+ "end": 166.02464566929135
1251
+ },
1252
+ {
1253
+ "text": "if",
1254
+ "start": 166.46637795275592,
1255
+ "end": 166.54669291338584
1256
+ },
1257
+ {
1258
+ "text": "we",
1259
+ "start": 166.5868503937008,
1260
+ "end": 166.70732283464568
1261
+ },
1262
+ {
1263
+ "text": "went",
1264
+ "start": 166.74748031496063,
1265
+ "end": 166.90811023622047
1266
+ },
1267
+ {
1268
+ "text": "back",
1269
+ "start": 166.9884251968504,
1270
+ "end": 167.2092913385827
1271
+ },
1272
+ {
1273
+ "text": "in",
1274
+ "start": 167.26952755905512,
1275
+ "end": 167.34984251968504
1276
+ },
1277
+ {
1278
+ "text": "time",
1279
+ "start": 167.43015748031496,
1280
+ "end": 167.79157480314962
1281
+ },
1282
+ {
1283
+ "text": "we",
1284
+ "start": 168.37385826771654,
1285
+ "end": 168.47425196850395
1286
+ },
1287
+ {
1288
+ "text": "could",
1289
+ "start": 168.5144094488189,
1290
+ "end": 168.67503937007874
1291
+ },
1292
+ {
1293
+ "text": "watch",
1294
+ "start": 168.75535433070866,
1295
+ "end": 169.05653543307088
1296
+ },
1297
+ {
1298
+ "text": "it",
1299
+ "start": 169.09669291338582,
1300
+ "end": 169.17700787401574
1301
+ },
1302
+ {
1303
+ "text": "shrink",
1304
+ "start": 169.3175590551181,
1305
+ "end": 169.57858267716534
1306
+ },
1307
+ {
1308
+ "text": "back",
1309
+ "start": 175.1248,
1310
+ "end": 175.3654857142857
1311
+ },
1312
+ {
1313
+ "text": "far",
1314
+ "start": 175.42565714285715,
1315
+ "end": 175.66634285714284
1316
+ },
1317
+ {
1318
+ "text": "enough",
1319
+ "start": 175.70645714285715,
1320
+ "end": 175.98725714285715
1321
+ },
1322
+ {
1323
+ "text": "and",
1324
+ "start": 176.72937142857143,
1325
+ "end": 176.8096
1326
+ },
1327
+ {
1328
+ "text": "the",
1329
+ "start": 176.84971428571427,
1330
+ "end": 176.92994285714283
1331
+ },
1332
+ {
1333
+ "text": "universe",
1334
+ "start": 177.03022857142855,
1335
+ "end": 177.41131428571427
1336
+ },
1337
+ {
1338
+ "text": "would",
1339
+ "start": 177.4714857142857,
1340
+ "end": 177.63194285714283
1341
+ },
1342
+ {
1343
+ "text": "be",
1344
+ "start": 177.652,
1345
+ "end": 177.7522857142857
1346
+ },
1347
+ {
1348
+ "text": "smaller",
1349
+ "start": 177.83251428571427,
1350
+ "end": 178.23365714285714
1351
+ },
1352
+ {
1353
+ "text": "than",
1354
+ "start": 178.27377142857142,
1355
+ "end": 178.43422857142855
1356
+ },
1357
+ {
1358
+ "text": "a",
1359
+ "start": 178.47434285714283,
1360
+ "end": 178.49439999999998
1361
+ },
1362
+ {
1363
+ "text": "galaxy",
1364
+ "start": 178.57462857142855,
1365
+ "end": 179.07605714285714
1366
+ },
1367
+ {
1368
+ "text": "back..and",
1369
+ "start": 180.9022797927461,
1370
+ "end": 181.76673575129533
1371
+ },
1372
+ {
1373
+ "text": "the",
1374
+ "start": 181.78683937823834,
1375
+ "end": 181.86725388601036
1376
+ },
1377
+ {
1378
+ "text": "universe",
1379
+ "start": 181.9677720207254,
1380
+ "end": 182.32963730569946
1381
+ },
1382
+ {
1383
+ "text": "is",
1384
+ "start": 182.3899481865285,
1385
+ "end": 182.4502590673575
1386
+ },
1387
+ {
1388
+ "text": "smaller",
1389
+ "start": 182.51056994818651,
1390
+ "end": 182.87243523316062
1391
+ },
1392
+ {
1393
+ "text": "than",
1394
+ "start": 182.91264248704664,
1395
+ "end": 183.05336787564767
1396
+ },
1397
+ {
1398
+ "text": "our",
1399
+ "start": 183.13378238341969,
1400
+ "end": 183.23430051813472
1401
+ },
1402
+ {
1403
+ "text": "solar",
1404
+ "start": 183.31471502590674,
1405
+ "end": 183.6162694300518
1406
+ },
1407
+ {
1408
+ "text": "system",
1409
+ "start": 183.67658031088084,
1410
+ "end": 183.9580310880829
1411
+ },
1412
+ {
1413
+ "text": "inely",
1414
+ "start": 184.54080321285142,
1415
+ "end": 184.82192771084337
1416
+ },
1417
+ {
1418
+ "text": "earn",
1419
+ "start": 184.9223293172691,
1420
+ "end": 185.00265060240963
1421
+ },
1422
+ {
1423
+ "text": "more.",
1424
+ "start": 185.06289156626505,
1425
+ "end": 185.14321285140562
1426
+ },
1427
+ {
1428
+ "text": "Fast",
1429
+ "start": 189.34,
1430
+ "end": 189.46363636363637
1431
+ },
1432
+ {
1433
+ "text": "enjoyment.",
1434
+ "start": 189.48424242424244,
1435
+ "end": 189.95818181818183
1436
+ },
1437
+ {
1438
+ "text": "Faster",
1439
+ "start": 190.08015151515153,
1440
+ "end": 190.22050505050507
1441
+ },
1442
+ {
1443
+ "text": "back",
1444
+ "start": 191.40348484848485,
1445
+ "end": 191.5438383838384
1446
+ },
1447
+ {
1448
+ "text": "and",
1449
+ "start": 191.62404040404041,
1450
+ "end": 191.74434343434345
1451
+ },
1452
+ {
1453
+ "text": "everything",
1454
+ "start": 191.76439393939395,
1455
+ "end": 194.11030303030304
1456
+ },
1457
+ {
1458
+ "text": "that",
1459
+ "start": 198.14260869565217,
1460
+ "end": 198.4672463768116
1461
+ },
1462
+ {
1463
+ "text": "exists",
1464
+ "start": 198.54840579710145,
1465
+ "end": 198.67014492753626
1466
+ },
1467
+ {
1468
+ "text": "fits",
1469
+ "start": 198.71072463768118,
1470
+ "end": 198.87304347826088
1471
+ },
1472
+ {
1473
+ "text": "inside",
1474
+ "start": 198.89333333333335,
1475
+ "end": 199.01507246376812
1476
+ },
1477
+ {
1478
+ "text": "a",
1479
+ "start": 199.03536231884058,
1480
+ "end": 199.05565217391305
1481
+ },
1482
+ {
1483
+ "text": "stadium.",
1484
+ "start": 199.3800638977636,
1485
+ "end": 199.70108626198083
1486
+ },
1487
+ {
1488
+ "text": "our",
1489
+ "start": 199.78134185303514,
1490
+ "end": 200.122428115016
1491
+ },
1492
+ {
1493
+ "text": "coffee",
1494
+ "start": 200.14249201277957,
1495
+ "end": 200.4835782747604
1496
+ },
1497
+ {
1498
+ "text": "cup.",
1499
+ "start": 200.50364217252397,
1500
+ "end": 200.5638338658147
1501
+ },
1502
+ {
1503
+ "text": "adventure",
1504
+ "start": 205.94315789473686,
1505
+ "end": 206.2058947368421
1506
+ },
1507
+ {
1508
+ "text": "switch",
1509
+ "start": 207.27705263157895,
1510
+ "end": 207.51957894736842
1511
+ },
1512
+ {
1513
+ "text": "All",
1514
+ "start": 207.56,
1515
+ "end": 207.74346153846153
1516
+ },
1517
+ {
1518
+ "text": "stands",
1519
+ "start": 207.76384615384615,
1520
+ "end": 208.02884615384616
1521
+ },
1522
+ {
1523
+ "text": "at.",
1524
+ "start": 208.2326923076923,
1525
+ "end": 208.33461538461538
1526
+ },
1527
+ {
1528
+ "text": "G",
1529
+ "start": 210.18423913043478,
1530
+ "end": 210.20429347826087
1531
+ },
1532
+ {
1533
+ "text": "This",
1534
+ "start": 219.0646288209607,
1535
+ "end": 219.3056768558952
1536
+ },
1537
+ {
1538
+ "text": "was",
1539
+ "start": 219.52663755458516,
1540
+ "end": 219.6471615720524
1541
+ },
1542
+ {
1543
+ "text": "how",
1544
+ "start": 219.707423580786,
1545
+ "end": 219.86812227074236
1546
+ },
1547
+ {
1548
+ "text": "it",
1549
+ "start": 219.90829694323145,
1550
+ "end": 219.96855895196506
1551
+ },
1552
+ {
1553
+ "text": "all",
1554
+ "start": 220.10917030567686,
1555
+ "end": 220.24978165938865
1556
+ },
1557
+ {
1558
+ "text": "began.",
1559
+ "start": 220.35021834061135,
1560
+ "end": 220.69170305676855
1561
+ },
1562
+ {
1563
+ "text": "Performance",
1564
+ "start": 222.7410218978102,
1565
+ "end": 223.3655474452555
1566
+ },
1567
+ {
1568
+ "text": "The",
1569
+ "start": 225.520323943662,
1570
+ "end": 225.58044542253523
1571
+ },
1572
+ {
1573
+ "text": "Big",
1574
+ "start": 225.62052640845073,
1575
+ "end": 225.76080985915493
1576
+ },
1577
+ {
1578
+ "text": "Bang",
1579
+ "start": 225.78085035211268,
1580
+ "end": 225.86101232394367
1581
+ },
1582
+ {
1583
+ "text": "ain't",
1584
+ "start": 225.88105281690142,
1585
+ "end": 226.12153873239438
1586
+ },
1587
+ {
1588
+ "text": "the",
1589
+ "start": 226.18166021126763,
1590
+ "end": 226.30190316901408
1591
+ },
1592
+ {
1593
+ "text": "big",
1594
+ "start": 226.36202464788732,
1595
+ "end": 226.6425915492958
1596
+ },
1597
+ {
1598
+ "text": "bang.",
1599
+ "start": 226.782875,
1600
+ "end": 227.14360387323944
1601
+ }
1602
+ ]
setup.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import pkg_resources
4
+ from setuptools import setup, find_packages
5
+
6
+ setup(
7
+ name="whisperx",
8
+ py_modules=["whisperx"],
9
+ version="1.0",
10
+ description="Time-Accurate Automatic Speech Recognition using Whisper.",
11
+ readme="README.md",
12
+ python_requires=">=3.7",
13
+ author="Max Bain",
14
+ url="https://github.com/m-bain/whisperx",
15
+ license="MIT",
16
+ packages=find_packages(exclude=["tests*"]),
17
+ install_requires=[
18
+ str(r)
19
+ for r in pkg_resources.parse_requirements(
20
+ open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
21
+ )
22
+ ],
23
+ entry_points = {
24
+ 'console_scripts': ['whisperx=whisperx.transcribe:cli'],
25
+ },
26
+ include_package_data=True,
27
+ extras_require={'dev': ['pytest']},
28
+ )
transcription.json ADDED
@@ -0,0 +1,922 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": 0,
4
+ "seek": 0,
5
+ "start": 0.0,
6
+ "end": 3.54,
7
+ "text": " Right now at this very second.",
8
+ "tokens": [
9
+ 1779,
10
+ 586,
11
+ 412,
12
+ 341,
13
+ 588,
14
+ 1150,
15
+ 13
16
+ ],
17
+ "temperature": 1.0,
18
+ "avg_logprob": -0.6253367182034165,
19
+ "compression_ratio": 1.494047619047619,
20
+ "no_speech_prob": 0.5233597159385681
21
+ },
22
+ {
23
+ "id": 1,
24
+ "seek": 0,
25
+ "start": 3.54,
26
+ "end": 6.68,
27
+ "text": " We're in the aftermath of the Big Bang.",
28
+ "tokens": [
29
+ 492,
30
+ 434,
31
+ 294,
32
+ 264,
33
+ 34095,
34
+ 295,
35
+ 264,
36
+ 5429,
37
+ 11538,
38
+ 13
39
+ ],
40
+ "temperature": 1.0,
41
+ "avg_logprob": -0.6253367182034165,
42
+ "compression_ratio": 1.494047619047619,
43
+ "no_speech_prob": 0.5233597159385681
44
+ },
45
+ {
46
+ "id": 2,
47
+ "seek": 0,
48
+ "start": 6.68,
49
+ "end": 16.66,
50
+ "text": " Everything we see in here and taste and smell, and touch, is the aftermath.",
51
+ "tokens": [
52
+ 5471,
53
+ 321,
54
+ 536,
55
+ 294,
56
+ 510,
57
+ 293,
58
+ 3939,
59
+ 293,
60
+ 4316,
61
+ 11,
62
+ 293,
63
+ 2557,
64
+ 11,
65
+ 307,
66
+ 264,
67
+ 34095,
68
+ 13
69
+ ],
70
+ "temperature": 1.0,
71
+ "avg_logprob": -0.6253367182034165,
72
+ "compression_ratio": 1.494047619047619,
73
+ "no_speech_prob": 0.5233597159385681
74
+ },
75
+ {
76
+ "id": 3,
77
+ "seek": 0,
78
+ "start": 16.66,
79
+ "end": 22.18,
80
+ "text": " The Big Bang is really our evolving expanding universe.",
81
+ "tokens": [
82
+ 440,
83
+ 5429,
84
+ 11538,
85
+ 307,
86
+ 534,
87
+ 527,
88
+ 21085,
89
+ 14702,
90
+ 6445,
91
+ 13
92
+ ],
93
+ "temperature": 1.0,
94
+ "avg_logprob": -0.6253367182034165,
95
+ "compression_ratio": 1.494047619047619,
96
+ "no_speech_prob": 0.5233597159385681
97
+ },
98
+ {
99
+ "id": 4,
100
+ "seek": 0,
101
+ "start": 23.78,
102
+ "end": 27.92,
103
+ "text": " For us, mostly stuck on our rocky little planet.",
104
+ "tokens": [
105
+ 1171,
106
+ 505,
107
+ 11,
108
+ 5240,
109
+ 5541,
110
+ 322,
111
+ 527,
112
+ 33301,
113
+ 707,
114
+ 5054,
115
+ 13
116
+ ],
117
+ "temperature": 1.0,
118
+ "avg_logprob": -0.6253367182034165,
119
+ "compression_ratio": 1.494047619047619,
120
+ "no_speech_prob": 0.5233597159385681
121
+ },
122
+ {
123
+ "id": 5,
124
+ "seek": 2792,
125
+ "start": 28.3,
126
+ "end": 31.180000000000003,
127
+ "text": " The view of the universe begins with Earth.",
128
+ "tokens": [
129
+ 440,
130
+ 1910,
131
+ 295,
132
+ 264,
133
+ 6445,
134
+ 7338,
135
+ 365,
136
+ 4755,
137
+ 13
138
+ ],
139
+ "temperature": 1.0,
140
+ "avg_logprob": -0.9778916732124661,
141
+ "compression_ratio": 1.4180790960451977,
142
+ "no_speech_prob": 0.25073713064193726
143
+ },
144
+ {
145
+ "id": 6,
146
+ "seek": 2792,
147
+ "start": 32.440000000000005,
148
+ "end": 34.160000000000004,
149
+ "text": " This, is Earth.",
150
+ "tokens": [
151
+ 639,
152
+ 11,
153
+ 307,
154
+ 4755,
155
+ 13
156
+ ],
157
+ "temperature": 1.0,
158
+ "avg_logprob": -0.9778916732124661,
159
+ "compression_ratio": 1.4180790960451977,
160
+ "no_speech_prob": 0.25073713064193726
161
+ },
162
+ {
163
+ "id": 7,
164
+ "seek": 2792,
165
+ "start": 34.38,
166
+ "end": 38.18,
167
+ "text": " Silicon and oxygen based, with a metallic core.",
168
+ "tokens": [
169
+ 25351,
170
+ 293,
171
+ 9169,
172
+ 2361,
173
+ 11,
174
+ 365,
175
+ 257,
176
+ 25759,
177
+ 4965,
178
+ 13
179
+ ],
180
+ "temperature": 1.0,
181
+ "avg_logprob": -0.9778916732124661,
182
+ "compression_ratio": 1.4180790960451977,
183
+ "no_speech_prob": 0.25073713064193726
184
+ },
185
+ {
186
+ "id": 8,
187
+ "seek": 2792,
188
+ "start": 39.06,
189
+ "end": 40.88,
190
+ "text": " The surface is mostly water.",
191
+ "tokens": [
192
+ 440,
193
+ 3753,
194
+ 307,
195
+ 5240,
196
+ 1281,
197
+ 13
198
+ ],
199
+ "temperature": 1.0,
200
+ "avg_logprob": -0.9778916732124661,
201
+ "compression_ratio": 1.4180790960451977,
202
+ "no_speech_prob": 0.25073713064193726
203
+ },
204
+ {
205
+ "id": 9,
206
+ "seek": 2792,
207
+ "start": 42.02,
208
+ "end": 46.22,
209
+ "text": " It teens with life and rotates once every twenty-four hours,",
210
+ "tokens": [
211
+ 467,
212
+ 24849,
213
+ 365,
214
+ 993,
215
+ 293,
216
+ 42133,
217
+ 1564,
218
+ 633,
219
+ 7699,
220
+ 12,
221
+ 23251,
222
+ 2496,
223
+ 11
224
+ ],
225
+ "temperature": 1.0,
226
+ "avg_logprob": -0.9778916732124661,
227
+ "compression_ratio": 1.4180790960451977,
228
+ "no_speech_prob": 0.25073713064193726
229
+ },
230
+ {
231
+ "id": 10,
232
+ "seek": 2792,
233
+ "start": 46.38,
234
+ "end": 51.2,
235
+ "text": " while orbiting a star called the sun, every 365 days.",
236
+ "tokens": [
237
+ 1339,
238
+ 48985,
239
+ 257,
240
+ 3543,
241
+ 1219,
242
+ 264,
243
+ 3295,
244
+ 11,
245
+ 633,
246
+ 22046,
247
+ 1708,
248
+ 13
249
+ ],
250
+ "temperature": 1.0,
251
+ "avg_logprob": -0.9778916732124661,
252
+ "compression_ratio": 1.4180790960451977,
253
+ "no_speech_prob": 0.25073713064193726
254
+ },
255
+ {
256
+ "id": 11,
257
+ "seek": 5120,
258
+ "start": 51.2,
259
+ "end": 56.2,
260
+ "text": " From what I said here...",
261
+ "tokens": [
262
+ 3358,
263
+ 437,
264
+ 286,
265
+ 848,
266
+ 510,
267
+ 485
268
+ ],
269
+ "temperature": 1.0,
270
+ "avg_logprob": -1.3197117789846953,
271
+ "compression_ratio": 1.3988095238095237,
272
+ "no_speech_prob": 0.273966521024704
273
+ },
274
+ {
275
+ "id": 12,
276
+ "seek": 5120,
277
+ "start": 56.120000000000005,
278
+ "end": 60.86,
279
+ "text": " This is the Sun, mostly hydrogen and helium.",
280
+ "tokens": [
281
+ 639,
282
+ 307,
283
+ 264,
284
+ 6163,
285
+ 11,
286
+ 5240,
287
+ 12697,
288
+ 293,
289
+ 40175,
290
+ 13
291
+ ],
292
+ "temperature": 1.0,
293
+ "avg_logprob": -1.3197117789846953,
294
+ "compression_ratio": 1.3988095238095237,
295
+ "no_speech_prob": 0.273966521024704
296
+ },
297
+ {
298
+ "id": 13,
299
+ "seek": 5120,
300
+ "start": 60.86,
301
+ "end": 66.24000000000001,
302
+ "text": " Its surface temperature is nearly 10,000 degrees Fahrenheit.",
303
+ "tokens": [
304
+ 6953,
305
+ 3753,
306
+ 4292,
307
+ 307,
308
+ 6217,
309
+ 1266,
310
+ 11,
311
+ 1360,
312
+ 5310,
313
+ 31199,
314
+ 13
315
+ ],
316
+ "temperature": 1.0,
317
+ "avg_logprob": -1.3197117789846953,
318
+ "compression_ratio": 1.3988095238095237,
319
+ "no_speech_prob": 0.273966521024704
320
+ },
321
+ {
322
+ "id": 14,
323
+ "seek": 5120,
324
+ "start": 66.96000000000001,
325
+ "end": 71.88,
326
+ "text": " For energy, our Sun converged 700 million tons of hydrogen",
327
+ "tokens": [
328
+ 1171,
329
+ 2281,
330
+ 11,
331
+ 527,
332
+ 6163,
333
+ 9652,
334
+ 3004,
335
+ 15204,
336
+ 2459,
337
+ 9131,
338
+ 295,
339
+ 12697
340
+ ],
341
+ "temperature": 1.0,
342
+ "avg_logprob": -1.3197117789846953,
343
+ "compression_ratio": 1.3988095238095237,
344
+ "no_speech_prob": 0.273966521024704
345
+ },
346
+ {
347
+ "id": 15,
348
+ "seek": 5120,
349
+ "start": 71.96000000000001,
350
+ "end": 77.34,
351
+ "text": " into 695 billion tons of helium every second.",
352
+ "tokens": [
353
+ 666,
354
+ 1386,
355
+ 15718,
356
+ 5218,
357
+ 9131,
358
+ 295,
359
+ 40175,
360
+ 633,
361
+ 1150,
362
+ 13
363
+ ],
364
+ "temperature": 1.0,
365
+ "avg_logprob": -1.3197117789846953,
366
+ "compression_ratio": 1.3988095238095237,
367
+ "no_speech_prob": 0.273966521024704
368
+ },
369
+ {
370
+ "id": 16,
371
+ "seek": 7734,
372
+ "start": 77.34,
373
+ "end": 83.8,
374
+ "text": " Sun is in part of a solar system formed around 4.5 billion years ago that includes earth",
375
+ "tokens": [
376
+ 6163,
377
+ 307,
378
+ 294,
379
+ 644,
380
+ 295,
381
+ 257,
382
+ 7936,
383
+ 1185,
384
+ 8693,
385
+ 926,
386
+ 1017,
387
+ 13,
388
+ 20,
389
+ 5218,
390
+ 924,
391
+ 2057,
392
+ 300,
393
+ 5974,
394
+ 4120
395
+ ],
396
+ "temperature": 1.0,
397
+ "avg_logprob": -0.7630057561965216,
398
+ "compression_ratio": 1.4043715846994536,
399
+ "no_speech_prob": 0.3166518807411194
400
+ },
401
+ {
402
+ "id": 17,
403
+ "seek": 7734,
404
+ "start": 83.8,
405
+ "end": 88.84,
406
+ "text": " and seven other orbiting planets from Mercury to Neptune.",
407
+ "tokens": [
408
+ 293,
409
+ 3407,
410
+ 661,
411
+ 48985,
412
+ 15126,
413
+ 490,
414
+ 31780,
415
+ 281,
416
+ 49527,
417
+ 13
418
+ ],
419
+ "temperature": 1.0,
420
+ "avg_logprob": -0.7630057561965216,
421
+ "compression_ratio": 1.4043715846994536,
422
+ "no_speech_prob": 0.3166518807411194
423
+ },
424
+ {
425
+ "id": 18,
426
+ "seek": 7734,
427
+ "start": 88.84,
428
+ "end": 92.32000000000001,
429
+ "text": " And its isn't a stationary system.",
430
+ "tokens": [
431
+ 400,
432
+ 1080,
433
+ 1943,
434
+ 380,
435
+ 257,
436
+ 30452,
437
+ 1185,
438
+ 13
439
+ ],
440
+ "temperature": 1.0,
441
+ "avg_logprob": -0.7630057561965216,
442
+ "compression_ratio": 1.4043715846994536,
443
+ "no_speech_prob": 0.3166518807411194
444
+ },
445
+ {
446
+ "id": 19,
447
+ "seek": 7734,
448
+ "start": 93.56,
449
+ "end": 102.46000000000001,
450
+ "text": " Our solar system is spinning, flying through space at 134 miles per second.",
451
+ "tokens": [
452
+ 2621,
453
+ 7936,
454
+ 1185,
455
+ 307,
456
+ 15640,
457
+ 11,
458
+ 7137,
459
+ 807,
460
+ 1901,
461
+ 412,
462
+ 3705,
463
+ 19,
464
+ 6193,
465
+ 680,
466
+ 1150,
467
+ 13
468
+ ],
469
+ "temperature": 1.0,
470
+ "avg_logprob": -0.7630057561965216,
471
+ "compression_ratio": 1.4043715846994536,
472
+ "no_speech_prob": 0.3166518807411194
473
+ },
474
+ {
475
+ "id": 20,
476
+ "seek": 10246,
477
+ "start": 102.46,
478
+ "end": 108.52,
479
+ "text": " turning in circles as part of a vast collection of stars and star systems.",
480
+ "tokens": [
481
+ 6246,
482
+ 294,
483
+ 13040,
484
+ 382,
485
+ 644,
486
+ 295,
487
+ 257,
488
+ 8369,
489
+ 5765,
490
+ 295,
491
+ 6105,
492
+ 293,
493
+ 3543,
494
+ 3652,
495
+ 13
496
+ ],
497
+ "temperature": 1.0,
498
+ "avg_logprob": -0.6545037693447537,
499
+ "compression_ratio": 1.5532994923857868,
500
+ "no_speech_prob": 0.09585275501012802
501
+ },
502
+ {
503
+ "id": 21,
504
+ "seek": 10246,
505
+ "start": 109.02,
506
+ "end": 115.0,
507
+ "text": " There may be 200 billion stars in this collection called the Milky Way Galaxy.",
508
+ "tokens": [
509
+ 821,
510
+ 815,
511
+ 312,
512
+ 2331,
513
+ 5218,
514
+ 6105,
515
+ 294,
516
+ 341,
517
+ 5765,
518
+ 1219,
519
+ 264,
520
+ 38465,
521
+ 9558,
522
+ 13520,
523
+ 13
524
+ ],
525
+ "temperature": 1.0,
526
+ "avg_logprob": -0.6545037693447537,
527
+ "compression_ratio": 1.5532994923857868,
528
+ "no_speech_prob": 0.09585275501012802
529
+ },
530
+ {
531
+ "id": 22,
532
+ "seek": 10246,
533
+ "start": 115.39999999999999,
534
+ "end": 121.19999999999999,
535
+ "text": " And estimated 6 billion of those stars with planetary systems like ours.",
536
+ "tokens": [
537
+ 400,
538
+ 14109,
539
+ 1386,
540
+ 5218,
541
+ 295,
542
+ 729,
543
+ 6105,
544
+ 365,
545
+ 35788,
546
+ 3652,
547
+ 411,
548
+ 11896,
549
+ 13
550
+ ],
551
+ "temperature": 1.0,
552
+ "avg_logprob": -0.6545037693447537,
553
+ "compression_ratio": 1.5532994923857868,
554
+ "no_speech_prob": 0.09585275501012802
555
+ },
556
+ {
557
+ "id": 23,
558
+ "seek": 10246,
559
+ "start": 121.96,
560
+ "end": 128.1,
561
+ "text": " Our Solar System orbits the center of the Milky Way, on one of it's outer arms.",
562
+ "tokens": [
563
+ 2621,
564
+ 22385,
565
+ 8910,
566
+ 43522,
567
+ 264,
568
+ 3056,
569
+ 295,
570
+ 264,
571
+ 38465,
572
+ 9558,
573
+ 11,
574
+ 322,
575
+ 472,
576
+ 295,
577
+ 309,
578
+ 311,
579
+ 10847,
580
+ 5812,
581
+ 13
582
+ ],
583
+ "temperature": 1.0,
584
+ "avg_logprob": -0.6545037693447537,
585
+ "compression_ratio": 1.5532994923857868,
586
+ "no_speech_prob": 0.09585275501012802
587
+ },
588
+ {
589
+ "id": 24,
590
+ "seek": 12810,
591
+ "start": 128.1,
592
+ "end": 156.76,
593
+ "text": " The Milky Way is a snow of 235 billion galaxies that predicts the unique",
594
+ "tokens": [
595
+ 440,
596
+ 38465,
597
+ 9558,
598
+ 307,
599
+ 257,
600
+ 5756,
601
+ 295,
602
+ 6673,
603
+ 20,
604
+ 5218,
605
+ 28755,
606
+ 300,
607
+ 6069,
608
+ 82,
609
+ 264,
610
+ 3845
611
+ ],
612
+ "temperature": 1.0,
613
+ "avg_logprob": -3.183812141418457,
614
+ "compression_ratio": 0.96,
615
+ "no_speech_prob": 0.2284129559993744
616
+ },
617
+ {
618
+ "id": 25,
619
+ "seek": 15676,
620
+ "start": 156.76,
621
+ "end": 162.26,
622
+ "text": " if the universe is expanding then it used to be smaller",
623
+ "tokens": [
624
+ 498,
625
+ 264,
626
+ 6445,
627
+ 307,
628
+ 14702,
629
+ 550,
630
+ 309,
631
+ 1143,
632
+ 281,
633
+ 312,
634
+ 4356
635
+ ],
636
+ "temperature": 1.0,
637
+ "avg_logprob": -0.819732165727459,
638
+ "compression_ratio": 1.7013888888888888,
639
+ "no_speech_prob": 0.3402792513370514
640
+ },
641
+ {
642
+ "id": 26,
643
+ "seek": 15676,
644
+ "start": 162.26,
645
+ "end": 164.84,
646
+ "text": " much smaller",
647
+ "tokens": [
648
+ 709,
649
+ 4356
650
+ ],
651
+ "temperature": 1.0,
652
+ "avg_logprob": -0.819732165727459,
653
+ "compression_ratio": 1.7013888888888888,
654
+ "no_speech_prob": 0.3402792513370514
655
+ },
656
+ {
657
+ "id": 27,
658
+ "seek": 15676,
659
+ "start": 164.84,
660
+ "end": 169.94,
661
+ "text": " in fact if we went back in time we could watch it shrink",
662
+ "tokens": [
663
+ 294,
664
+ 1186,
665
+ 498,
666
+ 321,
667
+ 1437,
668
+ 646,
669
+ 294,
670
+ 565,
671
+ 321,
672
+ 727,
673
+ 1159,
674
+ 309,
675
+ 23060
676
+ ],
677
+ "temperature": 1.0,
678
+ "avg_logprob": -0.819732165727459,
679
+ "compression_ratio": 1.7013888888888888,
680
+ "no_speech_prob": 0.3402792513370514
681
+ },
682
+ {
683
+ "id": 28,
684
+ "seek": 15676,
685
+ "start": 173.44,
686
+ "end": 180.45999999999998,
687
+ "text": " back far enough and the universe would be smaller than a galaxy",
688
+ "tokens": [
689
+ 646,
690
+ 1400,
691
+ 1547,
692
+ 293,
693
+ 264,
694
+ 6445,
695
+ 576,
696
+ 312,
697
+ 4356,
698
+ 813,
699
+ 257,
700
+ 17639
701
+ ],
702
+ "temperature": 1.0,
703
+ "avg_logprob": -0.819732165727459,
704
+ "compression_ratio": 1.7013888888888888,
705
+ "no_speech_prob": 0.3402792513370514
706
+ },
707
+ {
708
+ "id": 29,
709
+ "seek": 15676,
710
+ "start": 180.45999999999998,
711
+ "end": 184.34,
712
+ "text": " back..and the universe is smaller than our solar system",
713
+ "tokens": [
714
+ 646,
715
+ 353,
716
+ 474,
717
+ 264,
718
+ 6445,
719
+ 307,
720
+ 4356,
721
+ 813,
722
+ 527,
723
+ 7936,
724
+ 1185
725
+ ],
726
+ "temperature": 1.0,
727
+ "avg_logprob": -0.819732165727459,
728
+ "compression_ratio": 1.7013888888888888,
729
+ "no_speech_prob": 0.3402792513370514
730
+ },
731
+ {
732
+ "id": 30,
733
+ "seek": 18434,
734
+ "start": 184.34,
735
+ "end": 189.34,
736
+ "text": "inely earn more.",
737
+ "tokens": [
738
+ 33592,
739
+ 6012,
740
+ 544,
741
+ 13
742
+ ],
743
+ "temperature": 1.0,
744
+ "avg_logprob": -4.394529169256037,
745
+ "compression_ratio": 1.2456140350877194,
746
+ "no_speech_prob": 0.16134849190711975
747
+ },
748
+ {
749
+ "id": 31,
750
+ "seek": 18434,
751
+ "start": 185.28,
752
+ "end": 190.02,
753
+ "text": " Fast enjoyment.",
754
+ "tokens": [
755
+ 15968,
756
+ 32013,
757
+ 13
758
+ ],
759
+ "temperature": 1.0,
760
+ "avg_logprob": -4.394529169256037,
761
+ "compression_ratio": 1.2456140350877194,
762
+ "no_speech_prob": 0.16134849190711975
763
+ },
764
+ {
765
+ "id": 32,
766
+ "seek": 18434,
767
+ "start": 187.16,
768
+ "end": 197.96,
769
+ "text": " Faster back and everything",
770
+ "tokens": [
771
+ 46665,
772
+ 646,
773
+ 293,
774
+ 1203
775
+ ],
776
+ "temperature": 1.0,
777
+ "avg_logprob": -4.394529169256037,
778
+ "compression_ratio": 1.2456140350877194,
779
+ "no_speech_prob": 0.16134849190711975
780
+ },
781
+ {
782
+ "id": 33,
783
+ "seek": 18434,
784
+ "start": 188.18,
785
+ "end": 199.36,
786
+ "text": " that exists fits inside a",
787
+ "tokens": [
788
+ 300,
789
+ 8198,
790
+ 9001,
791
+ 1854,
792
+ 257
793
+ ],
794
+ "temperature": 1.0,
795
+ "avg_logprob": -4.394529169256037,
796
+ "compression_ratio": 1.2456140350877194,
797
+ "no_speech_prob": 0.16134849190711975
798
+ },
799
+ {
800
+ "id": 34,
801
+ "seek": 18434,
802
+ "start": 194.4,
803
+ "end": 205.64000000000001,
804
+ "text": " stadium. our coffee cup.",
805
+ "tokens": [
806
+ 18585,
807
+ 13,
808
+ 527,
809
+ 4982,
810
+ 4414,
811
+ 13
812
+ ],
813
+ "temperature": 1.0,
814
+ "avg_logprob": -4.394529169256037,
815
+ "compression_ratio": 1.2456140350877194,
816
+ "no_speech_prob": 0.16134849190711975
817
+ },
818
+ {
819
+ "id": 35,
820
+ "seek": 18434,
821
+ "start": 199.54,
822
+ "end": 207.56,
823
+ "text": " adventure switch",
824
+ "tokens": [
825
+ 9868,
826
+ 3679
827
+ ],
828
+ "temperature": 1.0,
829
+ "avg_logprob": -4.394529169256037,
830
+ "compression_ratio": 1.2456140350877194,
831
+ "no_speech_prob": 0.16134849190711975
832
+ },
833
+ {
834
+ "id": 36,
835
+ "seek": 18434,
836
+ "start": 205.1,
837
+ "end": 208.62,
838
+ "text": " All stands at.",
839
+ "tokens": [
840
+ 1057,
841
+ 7382,
842
+ 412,
843
+ 13
844
+ ],
845
+ "temperature": 1.0,
846
+ "avg_logprob": -4.394529169256037,
847
+ "compression_ratio": 1.2456140350877194,
848
+ "no_speech_prob": 0.16134849190711975
849
+ },
850
+ {
851
+ "id": 37,
852
+ "seek": 20862,
853
+ "start": 208.62,
854
+ "end": 216.0,
855
+ "text": " G",
856
+ "tokens": [
857
+ 460
858
+ ],
859
+ "temperature": 1.0,
860
+ "avg_logprob": -3.1060187395881202,
861
+ "compression_ratio": 0.8333333333333334,
862
+ "no_speech_prob": 0.1563536673784256
863
+ },
864
+ {
865
+ "id": 38,
866
+ "seek": 20862,
867
+ "start": 218.0,
868
+ "end": 222.6,
869
+ "text": " This was how it all began.",
870
+ "tokens": [
871
+ 639,
872
+ 390,
873
+ 577,
874
+ 309,
875
+ 439,
876
+ 4283,
877
+ 13
878
+ ],
879
+ "temperature": 1.0,
880
+ "avg_logprob": -3.1060187395881202,
881
+ "compression_ratio": 0.8333333333333334,
882
+ "no_speech_prob": 0.1563536673784256
883
+ },
884
+ {
885
+ "id": 39,
886
+ "seek": 20862,
887
+ "start": 222.6,
888
+ "end": 225.36,
889
+ "text": " Performance",
890
+ "tokens": [
891
+ 25047
892
+ ],
893
+ "temperature": 1.0,
894
+ "avg_logprob": -3.1060187395881202,
895
+ "compression_ratio": 0.8333333333333334,
896
+ "no_speech_prob": 0.1563536673784256
897
+ },
898
+ {
899
+ "id": 40,
900
+ "seek": 22536,
901
+ "start": 225.36,
902
+ "end": 228.24,
903
+ "text": " The Big Bang ain't the big bang.",
904
+ "tokens": [
905
+ 50364,
906
+ 440,
907
+ 5429,
908
+ 11538,
909
+ 7862,
910
+ 380,
911
+ 264,
912
+ 955,
913
+ 8550,
914
+ 13,
915
+ 50508
916
+ ],
917
+ "temperature": 1.0,
918
+ "avg_logprob": -1.9082762400309246,
919
+ "compression_ratio": 0.8888888888888888,
920
+ "no_speech_prob": 0.19741153717041016
921
+ }
922
+ ]
utils.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisperx as whisper
2
+
3
+ from deep_translator import GoogleTranslator
4
+ import os
5
+ from whisperx.utils import write_vtt, write_srt, write_ass, write_tsv, write_txt
6
+
7
+
8
+ def detect_language(filename, model):
9
+ # load audio and pad/trim it to fit 30 seconds
10
+ audio = whisper.load_audio(file=filename)
11
+ audio = whisper.pad_or_trim(audio)
12
+ # make log-Mel spectrogram and move to the same device as the model
13
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
14
+ _, probs = model.detect_language(mel)
15
+ print(f"Detected language: {max(probs, key=probs.get)}")
16
+ return {"detected_language": max(probs, key=probs.get)}
17
+
18
+
19
+ def translate_to_english(transcription, json=False):
20
+ if json:
21
+ for text in transcription:
22
+ text["text"] = GoogleTranslator(source="auto", target="en").translate(
23
+ text["text"]
24
+ )
25
+ else:
26
+
27
+ for text in transcription["segments"]:
28
+ text["text"] = GoogleTranslator(source="auto", target="en").translate(
29
+ text["text"]
30
+ )
31
+ return transcription
32
+
33
+
34
+ def write(filename, dtype, result_aligned):
35
+
36
+ if dtype == "vtt":
37
+ with open(
38
+ os.path.join(".", os.path.splitext(filename)[0] + ".vtt"), "w"
39
+ ) as vtt:
40
+ write_vtt(result_aligned["segments"], file=vtt)
41
+ if dtype == "srt":
42
+ with open(
43
+ os.path.join(".", os.path.splitext(filename)[0] + ".srt"), "w"
44
+ ) as srt:
45
+ write_srt(result_aligned["segments"], file=srt)
46
+ if dtype == "ass":
47
+ with open(
48
+ os.path.join(".", os.path.splitext(filename)[0] + ".ass"), "w"
49
+ ) as ass:
50
+ write_ass(result_aligned["segments"], file=ass)
51
+ if dtype == "tsv":
52
+ with open(
53
+ os.path.join(".", os.path.splitext(filename)[0] + ".tsv"), "w"
54
+ ) as tsv:
55
+ write_tsv(result_aligned["segments"], file=tsv)
56
+ if dtype == "plain text":
57
+ print("here")
58
+ print(filename)
59
+ with open(
60
+ os.path.join(".", os.path.splitext(filename)[0] + ".txt"), "w"
61
+ ) as txt:
62
+ write_txt(result_aligned["segments"], file=txt)
63
+
64
+
65
+ def read(filename, transc):
66
+ if transc == "plain text":
67
+ transc = "txt"
68
+ filename = filename.split(".")[0]
69
+ print(filename)
70
+ with open(f"{filename}.{transc}", encoding="utf-8") as f:
71
+ content = f.readlines()
72
+ content = " ".join(z for z in content)
73
+ return content
74
+
75
+
76
+ from constants import language_dict
77
+
78
+
79
+ def get_key(val):
80
+ for key, value in language_dict.items():
81
+ if val == value:
82
+ return key
83
+ return "Key not found"