import re import gradio as gr import numpy as np import os import io import wave import threading import subprocess import sys import time from huggingface_hub import snapshot_download from tools.fish_e2e import FishE2EAgent, FishE2EEventType from tools.schema import ServeMessage, ServeTextPart, ServeVQPart # Download Weights os.makedirs("checkpoints", exist_ok=True) snapshot_download(repo_id="fishaudio/fish-speech-1.4", local_dir="./checkpoints/fish-speech-1.4") snapshot_download(repo_id="fishaudio/fish-agent-v0.1-3b", local_dir="./checkpoints/fish-agent-v0.1-3b") SYSTEM_PROMPT = 'You are a voice assistant created by Fish Audio, offering end-to-end voice interaction for a seamless user experience. You are required to first transcribe the user\'s speech, then answer it in the following format: "Question: [USER_SPEECH]\n\nResponse: [YOUR_RESPONSE]\n"。You are required to use the following voice in this conversation.' class ChatState: def __init__(self): self.conversation = [] self.added_systext = False self.added_sysaudio = False def get_history(self): results = [] for msg in self.conversation: results.append({"role": msg.role, "content": self.repr_message(msg)}) # Process assistant messages to extract questions and update user messages for i, msg in enumerate(results): if msg["role"] == "assistant": match = re.search(r"Question: (.*?)\n\nResponse:", msg["content"]) if match and i > 0 and results[i - 1]["role"] == "user": # Update previous user message with extracted question results[i - 1]["content"] += "\n" + match.group(1) # Remove the Question/Answer format from assistant message msg["content"] = msg["content"].split("\n\nResponse: ", 1)[1] return results def repr_message(self, msg: ServeMessage): response = "" for part in msg.parts: if isinstance(part, ServeTextPart): response += part.text elif isinstance(part, ServeVQPart): response += f"