Spaces:
Running
on
L40S
Running
on
L40S
import os | |
from argparse import ArgumentParser | |
from pathlib import Path | |
import httpx | |
import ormsgpack | |
from tools.schema import ServeReferenceAudio, ServeTTSRequest | |
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") | |
def audio_request(): | |
# priority: ref_id > references | |
request = ServeTTSRequest( | |
text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.", | |
# reference_id="114514", | |
references=[ | |
ServeReferenceAudio( | |
audio=open("lengyue.wav", "rb").read(), | |
text=open("lengyue.lab", "r", encoding="utf-8").read(), | |
) | |
], | |
streaming=True, | |
) | |
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") | |
with ( | |
httpx.Client() as client, | |
open("hello.wav", "wb") as f, | |
): | |
with client.stream( | |
"POST", | |
"http://127.0.0.1:8080/v1/tts", | |
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), | |
headers={ | |
"authorization": f"Bearer {api_key}", | |
"content-type": "application/msgpack", | |
}, | |
timeout=None, | |
) as response: | |
for chunk in response.iter_bytes(): | |
f.write(chunk) | |
def asr_request(audio_path: Path): | |
# Read the audio file | |
with open( | |
str(audio_path), | |
"rb", | |
) as audio_file: | |
audio_data = audio_file.read() | |
# Prepare the request data | |
request_data = { | |
"audio": audio_data, | |
"language": "en", # Optional: specify the language | |
"ignore_timestamps": False, # Optional: set to True to ignore precise timestamps | |
} | |
# Send the request | |
with httpx.Client() as client: | |
response = client.post( | |
"https://api.fish.audio/v1/asr", | |
headers={ | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/msgpack", | |
}, | |
content=ormsgpack.packb(request_data), | |
) | |
# Parse the response | |
result = response.json() | |
print(f"Transcribed text: {result['text']}") | |
print(f"Audio duration: {result['duration']} seconds") | |
for segment in result["segments"]: | |
print(f"Segment: {segment['text']}") | |
print(f"Start time: {segment['start']}, End time: {segment['end']}") | |
def parse_args(): | |
parser = ArgumentParser() | |
parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3") | |
return parser.parse_args() | |
if __name__ == "__main__": | |
args = parse_args() | |
asr_request(args.audio_path) | |