|
import gradio as gr |
|
from transformers import pipeline |
|
import torch |
|
import os |
|
from moviepy.editor import VideoFileClip |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(device) |
|
|
|
def extract_audio(video_path, audio_path): |
|
video = VideoFileClip(video_path) |
|
audio = video.audio |
|
audio.write_audiofile(audio_path) |
|
video.close() |
|
|
|
def transcribe_video(video_path): |
|
|
|
audio_path = "temp_audio.wav" |
|
extract_audio(video_path, audio_path) |
|
|
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device) |
|
|
|
|
|
result = transcriber(audio_path, chunk_length_s=30, return_timestamps=True) |
|
|
|
|
|
os.remove(audio_path) |
|
|
|
|
|
output_path = os.path.splitext(video_path)[0] + "_transcription.txt" |
|
with open(output_path, "w", encoding="utf-8") as f: |
|
f.write(result["text"]) |
|
|
|
return result["text"], output_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe_video, |
|
inputs=gr.Video(), |
|
outputs=[gr.Textbox(label="Транскрибация"), gr.File(label="Файл с транскрибацией")], |
|
title="Транскрибация видео", |
|
description="Загрузите видеофайл (mp4 или webm) для транскрибации. Результат будет отображен и сохранен в файл.", |
|
allow_flagging="never", |
|
theme = gr.themes.Soft() |
|
) |
|
|
|
|
|
iface.launch(share=True, debug=True) |