Spaces:
Edmond98
/
Running on A100

Afrinetwork7 commited on
Commit
06e4c74
1 Parent(s): 6250d85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -1,11 +1,11 @@
1
- from fastapi import FastAPI, UploadFile, File, Form
2
  from fastapi.responses import JSONResponse, FileResponse
3
  import uvicorn
4
  from pydantic import BaseModel
5
  import numpy as np
6
  import io
7
  import soundfile as sf
8
-
9
  from asr import transcribe, ASR_LANGUAGES
10
  from tts import synthesize, TTS_LANGUAGES
11
  from lid import identify
@@ -17,12 +17,16 @@ class TTSRequest(BaseModel):
17
  language: str
18
  speed: float
19
 
 
 
 
 
20
  @app.post("/transcribe")
21
- async def transcribe_audio(audio: UploadFile = File(...), language: str = Form(...)):
22
- contents = await audio.read()
23
- audio_array, sample_rate = sf.read(io.BytesIO(contents))
24
 
25
- result = transcribe(audio_array, language)
26
  return JSONResponse(content={"transcription": result})
27
 
28
  @app.post("/synthesize")
@@ -41,9 +45,9 @@ async def synthesize_speech(request: TTSRequest):
41
  )
42
 
43
  @app.post("/identify")
44
- async def identify_language(audio: UploadFile = File(...)):
45
- contents = await audio.read()
46
- audio_array, sample_rate = sf.read(io.BytesIO(contents))
47
 
48
  result = identify(audio_array)
49
  return JSONResponse(content={"language_identification": result})
@@ -54,4 +58,4 @@ async def get_asr_languages():
54
 
55
  @app.get("/tts_languages")
56
  async def get_tts_languages():
57
- return JSONResponse(content=TTS_LANGUAGES)
 
1
+ from fastapi import FastAPI, Form
2
  from fastapi.responses import JSONResponse, FileResponse
3
  import uvicorn
4
  from pydantic import BaseModel
5
  import numpy as np
6
  import io
7
  import soundfile as sf
8
+ import base64
9
  from asr import transcribe, ASR_LANGUAGES
10
  from tts import synthesize, TTS_LANGUAGES
11
  from lid import identify
 
17
  language: str
18
  speed: float
19
 
20
+ class AudioRequest(BaseModel):
21
+ audio: str # Base64 encoded audio data
22
+ language: str
23
+
24
  @app.post("/transcribe")
25
+ async def transcribe_audio(request: AudioRequest):
26
+ audio_bytes = base64.b64decode(request.audio)
27
+ audio_array, sample_rate = sf.read(io.BytesIO(audio_bytes))
28
 
29
+ result = transcribe(audio_array, request.language)
30
  return JSONResponse(content={"transcription": result})
31
 
32
  @app.post("/synthesize")
 
45
  )
46
 
47
  @app.post("/identify")
48
+ async def identify_language(request: AudioRequest):
49
+ audio_bytes = base64.b64decode(request.audio)
50
+ audio_array, sample_rate = sf.read(io.BytesIO(audio_bytes))
51
 
52
  result = identify(audio_array)
53
  return JSONResponse(content={"language_identification": result})
 
58
 
59
  @app.get("/tts_languages")
60
  async def get_tts_languages():
61
+ return JSONResponse(content=TTS_LANGUAGES)