Spaces:

Nitzantry1
/

pyannote-speaker-diarization2

Runtime error

File size: 2,010 Bytes

dd2fd57
 
83c0605
16d67e4
38591a9
dd2fd57
 
 
 
83c0605
 
dd2fd57
 
16d67e4
dd2fd57
16d67e4
dd2fd57
16d67e4
83c0605
dd2fd57
 
 
 
83c0605
dd2fd57

import os
import gradio as gr
from pyannote.audio import Pipeline
from faster_whisper import WhisperModel

# התקנה של PyAnnote במידה ועדיין לא מותקן
os.system('pip install pyannote.audio')

# טעינת ה-Pipeline של PyAnnote לדיאריזציה
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")

# טעינת המודל של Whisper לתמלול
whisper_model = WhisperModel("openai/whisper-large", device="cuda")

# פונקציה שמבצעת דיאריזציה ותמלול
def diarize_and_transcribe(audio_file):
    # שלב 1: דיאריזציה
    diarization = pipeline(audio_file)

    # משתנה לשמור את התמלול הכולל עם מידע על הדוברים
    full_transcription = []

    # מעבר על כל המקטעים שנמצאו על ידי דיאריזציה
    for segment, _, speaker in diarization.itertracks(yield_label=True):
        # המרת מקטע הזמן לפורמט מתאים לתמלול
        start_time = segment.start
        end_time = segment.end

        # תמלול המקטע בעזרת Whisper
        segments, _ = whisper_model.transcribe(audio_file, language="he", task="transcribe", 
                                               segment_start=start_time, segment_end=end_time)
        transcription = " ".join([seg.text for seg in segments])

        # הוספת תוצאה לתמלול הכולל
        full_transcription.append(f"Speaker {speaker}: {transcription} (from {start_time:.2f} to {end_time:.2f})")

    # החזרת התמלול המלא עם חלוקה לדוברים
    return "\n".join(full_transcription)

# יצירת ממשק גרדיו
interface = gr.Interface(
    fn=diarize_and_transcribe,
    inputs=gr.Audio(source="upload", type="filepath"),
    outputs="text",
    title="Speaker Diarization and Transcription",
    description="Upload an audio file to perform both speaker diarization and transcription."
)

# הרצת האפליקציה
interface.launch()