Nitzantry1 commited on
Commit
dd2fd57
โ€ข
1 Parent(s): 16d67e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -23
app.py CHANGED
@@ -1,33 +1,50 @@
 
 
1
  from pyannote.audio import Pipeline
2
  from faster_whisper import WhisperModel
3
 
4
- # Load the diarization pipeline
 
 
 
5
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
6
 
7
- # Load the transcription model (Whisper)
8
- model = WhisperModel("openai/whisper-large", device="cuda")
9
 
10
- # Function to handle diarization and transcription
11
  def diarize_and_transcribe(audio_file):
12
- # Step 1: Diarization
13
  diarization = pipeline(audio_file)
14
 
15
- # Step 2: Transcription
16
- result = []
 
 
17
  for segment, _, speaker in diarization.itertracks(yield_label=True):
18
- # Extract the segment audio (use an external tool like ffmpeg for extraction)
19
- segment_audio = extract_audio_segment(audio_file, segment.start, segment.end)
20
-
21
- # Transcribe the segment
22
- transcription_segments, _ = model.transcribe(segment_audio, language="he")
23
- transcription = " ".join([seg.text for seg in transcription_segments])
24
-
25
- # Append result with speaker and transcription
26
- result.append(f"Speaker {speaker}: {transcription}")
27
-
28
- return "\n".join(result)
29
-
30
- # Example usage
31
- audio_file_path = "example_audio.wav"
32
- output = diarize_and_transcribe(audio_file_path)
33
- print(output)
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
  from pyannote.audio import Pipeline
4
  from faster_whisper import WhisperModel
5
 
6
+ # ื”ืชืงื ื” ืฉืœ PyAnnote ื‘ืžื™ื“ื” ื•ืขื“ื™ื™ืŸ ืœื ืžื•ืชืงืŸ
7
+ os.system('pip install pyannote.audio')
8
+
9
+ # ื˜ืขื™ื ืช ื”-Pipeline ืฉืœ PyAnnote ืœื“ื™ืืจื™ื–ืฆื™ื”
10
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
11
 
12
+ # ื˜ืขื™ื ืช ื”ืžื•ื“ืœ ืฉืœ Whisper ืœืชืžืœื•ืœ
13
+ whisper_model = WhisperModel("openai/whisper-large", device="cuda")
14
 
15
+ # ืคื•ื ืงืฆื™ื” ืฉืžื‘ืฆืขืช ื“ื™ืืจื™ื–ืฆื™ื” ื•ืชืžืœื•ืœ
16
  def diarize_and_transcribe(audio_file):
17
+ # ืฉืœื‘ 1: ื“ื™ืืจื™ื–ืฆื™ื”
18
  diarization = pipeline(audio_file)
19
 
20
+ # ืžืฉืชื ื” ืœืฉืžื•ืจ ืืช ื”ืชืžืœื•ืœ ื”ื›ื•ืœืœ ืขื ืžื™ื“ืข ืขืœ ื”ื“ื•ื‘ืจื™ื
21
+ full_transcription = []
22
+
23
+ # ืžืขื‘ืจ ืขืœ ื›ืœ ื”ืžืงื˜ืขื™ื ืฉื ืžืฆืื• ืขืœ ื™ื“ื™ ื“ื™ืืจื™ื–ืฆื™ื”
24
  for segment, _, speaker in diarization.itertracks(yield_label=True):
25
+ # ื”ืžืจืช ืžืงื˜ืข ื”ื–ืžืŸ ืœืคื•ืจืžื˜ ืžืชืื™ื ืœืชืžืœื•ืœ
26
+ start_time = segment.start
27
+ end_time = segment.end
28
+
29
+ # ืชืžืœื•ืœ ื”ืžืงื˜ืข ื‘ืขื–ืจืช Whisper
30
+ segments, _ = whisper_model.transcribe(audio_file, language="he", task="transcribe",
31
+ segment_start=start_time, segment_end=end_time)
32
+ transcription = " ".join([seg.text for seg in segments])
33
+
34
+ # ื”ื•ืกืคืช ืชื•ืฆืื” ืœืชืžืœื•ืœ ื”ื›ื•ืœืœ
35
+ full_transcription.append(f"Speaker {speaker}: {transcription} (from {start_time:.2f} to {end_time:.2f})")
36
+
37
+ # ื”ื—ื–ืจืช ื”ืชืžืœื•ืœ ื”ืžืœื ืขื ื—ืœื•ืงื” ืœื“ื•ื‘ืจื™ื
38
+ return "\n".join(full_transcription)
39
+
40
+ # ื™ืฆื™ืจืช ืžืžืฉืง ื’ืจื“ื™ื•
41
+ interface = gr.Interface(
42
+ fn=diarize_and_transcribe,
43
+ inputs=gr.Audio(source="upload", type="filepath"),
44
+ outputs="text",
45
+ title="Speaker Diarization and Transcription",
46
+ description="Upload an audio file to perform both speaker diarization and transcription."
47
+ )
48
+
49
+ # ื”ืจืฆืช ื”ืืคืœื™ืงืฆื™ื”
50
+ interface.launch()