import json import os import requests import gradio as gr from loguru import logger # Environment variables for backend URL and model name BACKEND_URL = os.getenv('BACKEND_URL','') MODEL_NAME = os.getenv('MODEL_NAME') API_KEY = os.getenv('API_KEY') # Custom headers for the API request HEADERS = { 'orionstar-api-key': API_KEY, 'Content-Type': 'application/json' } def clear_session(): """Clears the chat session.""" return '', None def chat_stream_generator(url, payload): """Generator function to stream chat responses from the backend.""" answer = '' with requests.post(url, json=payload, headers=HEADERS, stream=True) as response: if response.encoding is None: response.encoding = 'utf-8' for line in response.iter_lines(decode_unicode=True): if line: line = line.replace('data: ', '') if line != '[DONE]': data = json.loads(line) if 'choices' in data and data['choices']: choice = data['choices'][0] if 'delta' in choice and choice['delta'].get('content'): answer += choice['delta']['content'] yield answer def generate_chat(input_text: str, history=None): """Generates chat responses and updates the chat history.""" if input_text is None: input_text = '' if history is None: history = [] history = history[-5:] # Keep the last 5 messages in history url = BACKEND_URL payload = { "model": MODEL_NAME, "stream": True, "messages": [ {"role": "user", "content": input_text} ] } gen = chat_stream_generator(url, payload) for response in gen: history.append((input_text, response)) yield None, history history.pop() history.append((input_text, response)) return None, gen logger.info('Starting the OrionStart-Yi-34B Chat...') # Gradio interface block = gr.Blocks() with block as demo: gr.Markdown("