Ar4ikov commited on
Commit
9b530c4
1 Parent(s): 5e9d349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -19,8 +19,7 @@ def speech_file_to_array_fn(path, sampling_rate):
19
  return speech
20
 
21
 
22
- def predict(path, sampling_rate):
23
- speech = speech_file_to_array_fn(path, sampling_rate)
24
  inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
25
  inputs = {key: inputs[key].to(device) for key in inputs}
26
 
@@ -43,18 +42,15 @@ model.to(device)
43
 
44
 
45
  def transcribe(audio):
46
- filename = audio.split("/")[-1]
47
- print(filename, f"c_{filename}")
48
- command = f"sox -t wav /tmp/{filename} -r 16000 -b 16 /tmp/c_{filename} channels 1"
49
- subprocess.call(command, shell=True)
50
- return predict(f"/tmp/c_{filename}", 16000)
51
 
52
 
53
  def get_asr_interface():
54
  return gr.Interface(
55
  fn=transcribe,
56
  inputs=[
57
- gr.inputs.Audio(source="microphone", type="filepath")
58
  ],
59
  outputs=[
60
  "textbox"
 
19
  return speech
20
 
21
 
22
+ def predict(speech, sampling_rate):
 
23
  inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
24
  inputs = {key: inputs[key].to(device) for key in inputs}
25
 
 
42
 
43
 
44
  def transcribe(audio):
45
+ print(audio.shape)
46
+ return predict(audio, 16000)
 
 
 
47
 
48
 
49
  def get_asr_interface():
50
  return gr.Interface(
51
  fn=transcribe,
52
  inputs=[
53
+ gr.inputs.Audio(source="microphone", type="numpy")
54
  ],
55
  outputs=[
56
  "textbox"