Spaces:

Nitzantry1
/

pyannote-speaker-diarization2

Runtime error

App Files Files Community

Nitzantry1 commited on 1 day ago

Commit

dd2fd57

•

1 Parent(s): 16d67e4

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -23

app.py CHANGED Viewed

@@ -1,33 +1,50 @@
 from pyannote.audio import Pipeline
 from faster_whisper import WhisperModel
-# Load the diarization pipeline
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
-# Load the transcription model (Whisper)
-model = WhisperModel("openai/whisper-large", device="cuda")
-# Function to handle diarization and transcription
 def diarize_and_transcribe(audio_file):
-    # Step 1: Diarization
     diarization = pipeline(audio_file)
-    # Step 2: Transcription
-    result = []
     for segment, _, speaker in diarization.itertracks(yield_label=True):
-        # Extract the segment audio (use an external tool like ffmpeg for extraction)
-        segment_audio = extract_audio_segment(audio_file, segment.start, segment.end)
-        # Transcribe the segment
-        transcription_segments, _ = model.transcribe(segment_audio, language="he")
-        transcription = " ".join([seg.text for seg in transcription_segments])
-        # Append result with speaker and transcription
-        result.append(f"Speaker {speaker}: {transcription}")
-    return "\n".join(result)
-# Example usage
-audio_file_path = "example_audio.wav"
-output = diarize_and_transcribe(audio_file_path)
-print(output)

+import os
+import gradio as gr
 from pyannote.audio import Pipeline
 from faster_whisper import WhisperModel
+# התקנה של PyAnnote במידה ועדיין לא מותקן
+os.system('pip install pyannote.audio')
+# טעינת ה-Pipeline של PyAnnote לדיאריזציה
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
+# טעינת המודל של Whisper לתמלול
+whisper_model = WhisperModel("openai/whisper-large", device="cuda")
+# פונקציה שמבצעת דיאריזציה ותמלול
 def diarize_and_transcribe(audio_file):
+    # שלב 1: דיאריזציה
     diarization = pipeline(audio_file)
+    # משתנה לשמור את התמלול הכולל עם מידע על הדוברים
+    full_transcription = []
+    # מעבר על כל המקטעים שנמצאו על ידי דיאריזציה
     for segment, _, speaker in diarization.itertracks(yield_label=True):
+        # המרת מקטע הזמן לפורמט מתאים לתמלול
+        start_time = segment.start
+        end_time = segment.end
+        # תמלול המקטע בעזרת Whisper
+        segments, _ = whisper_model.transcribe(audio_file, language="he", task="transcribe",
+                                               segment_start=start_time, segment_end=end_time)
+        transcription = " ".join([seg.text for seg in segments])
+        # הוספת תוצאה לתמלול הכולל
+        full_transcription.append(f"Speaker {speaker}: {transcription} (from {start_time:.2f} to {end_time:.2f})")
+    # החזרת התמלול המלא עם חלוקה לדוברים
+    return "\n".join(full_transcription)
+# יצירת ממשק גרדיו
+interface = gr.Interface(
+    fn=diarize_and_transcribe,
+    inputs=gr.Audio(source="upload", type="filepath"),
+    outputs="text",
+    title="Speaker Diarization and Transcription",
+    description="Upload an audio file to perform both speaker diarization and transcription."
+)
+# הרצת האפליקציה
+interface.launch()