|
Reading metadata...: 75336it [00:04, 15522.70it/s] | 0/10000 [00:00<?, ?it/s] |
|
Reading metadata...: 13630it [00:00, 20518.62it/s] |
|
[INFO|trainer_utils.py:830] 2024-10-07 10:21:26,106 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. |
|
Traceback (most recent call last): |
|
File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module> |
|
main() |
|
File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main |
|
train_result = trainer.train(resume_from_checkpoint=checkpoint) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2070, in train |
|
return inner_training_loop( |
|
^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2372, in _inner_training_loop |
|
for step, inputs in enumerate(epoch_iterator): |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 831, in __iter__ |
|
next_batch, next_batch_info = self._fetch_batches(main_iterator) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 752, in _fetch_batches |
|
batches.append(next(iterator)) |
|
^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 630, in __next__ |
|
data = self._next_data() |
|
^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 673, in _next_data |
|
data = self._dataset_fetcher.fetch(index) # may raise StopIteration |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch |
|
data.append(next(self.dataset_iter)) |
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2012, in __iter__ |
|
for key, example in ex_iterable: |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1203, in __iter__ |
|
yield from self._iter() |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1259, in _iter |
|
for key, example in iterator: |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1393, in __iter__ |
|
for x in self.ex_iterable: |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 947, in __iter__ |
|
yield from self._iter() |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1027, in _iter |
|
for key, example in iterator: |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1613, in __iter__ |
|
_apply_feature_types_on_example(example, self.features, token_per_repo_id=self.token_per_repo_id), |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1566, in _apply_feature_types_on_example |
|
decoded_example = features.decode_example(encoded_example, token_per_repo_id=token_per_repo_id) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/features.py", line 2042, in decode_example |
|
column_name: decode_nested_example(feature, value, token_per_repo_id=token_per_repo_id) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/features.py", line 1403, in decode_nested_example |
|
return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/audio.py", line 193, in decode_example |
|
array = librosa.resample(array, orig_sr=sampling_rate, target_sr=self.sampling_rate) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/librosa/core/audio.py", line 669, in resample |
|
y_hat = np.apply_along_axis( |
|
^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/numpy/lib/_shape_base_impl.py", line 384, in apply_along_axis |
|
res = asanyarray(func1d(inarr_view[ind0], *args, **kwargs)) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/soxr/__init__.py", line 206, in resample |
|
y = divide_proc(in_rate, out_rate, x[:, np.newaxis], q) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
KeyboardInterrupt |
|
|