import os from argparse import ArgumentParser from pathlib import Path import httpx import ormsgpack from tools.schema import ServeReferenceAudio, ServeTTSRequest api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") def audio_request(): # priority: ref_id > references request = ServeTTSRequest( text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.", # reference_id="114514", references=[ ServeReferenceAudio( audio=open("lengyue.wav", "rb").read(), text=open("lengyue.lab", "r", encoding="utf-8").read(), ) ], streaming=True, ) api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") with ( httpx.Client() as client, open("hello.wav", "wb") as f, ): with client.stream( "POST", "http://127.0.0.1:8080/v1/tts", content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), headers={ "authorization": f"Bearer {api_key}", "content-type": "application/msgpack", }, timeout=None, ) as response: for chunk in response.iter_bytes(): f.write(chunk) def asr_request(audio_path: Path): # Read the audio file with open( str(audio_path), "rb", ) as audio_file: audio_data = audio_file.read() # Prepare the request data request_data = { "audio": audio_data, "language": "en", # Optional: specify the language "ignore_timestamps": False, # Optional: set to True to ignore precise timestamps } # Send the request with httpx.Client() as client: response = client.post( "https://api.fish.audio/v1/asr", headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/msgpack", }, content=ormsgpack.packb(request_data), ) # Parse the response result = response.json() print(f"Transcribed text: {result['text']}") print(f"Audio duration: {result['duration']} seconds") for segment in result["segments"]: print(f"Segment: {segment['text']}") print(f"Start time: {segment['start']}, End time: {segment['end']}") def parse_args(): parser = ArgumentParser() parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3") return parser.parse_args() if __name__ == "__main__": args = parse_args() asr_request(args.audio_path)