OrionStar-Yi-34B-Chat Demo

import json
import os
import requests
import gradio as gr
from loguru import logger

# Environment variables for backend URL and model name
BACKEND_URL = os.getenv('BACKEND_URL','')
MODEL_NAME = os.getenv('MODEL_NAME')
API_KEY = os.getenv('API_KEY')

# Custom headers for the API request
HEADERS = {
    'orionstar-api-key': API_KEY,
    'Content-Type': 'application/json'
}

def clear_session():
    """Clears the chat session."""
    return '', None

def chat_stream_generator(url, payload):
    """Generator function to stream chat responses from the backend."""
    answer = ''
    with requests.post(url, json=payload, headers=HEADERS, stream=True) as response:
        if response.encoding is None:
            response.encoding = 'utf-8'
        for line in response.iter_lines(decode_unicode=True):
            if line:
                line = line.replace('data: ', '')
                if line != '[DONE]':
                    data = json.loads(line)
                    if 'choices' in data and data['choices']:
                        choice = data['choices'][0]
                        if 'delta' in choice and choice['delta'].get('content'):
                            answer += choice['delta']['content']
                            yield answer

def generate_chat(input_text: str, history=None):
    """Generates chat responses and updates the chat history."""
    if input_text is None:
        input_text = ''
    if history is None:
        history = []
    history = history[-5:]  # Keep the last 5 messages in history
    url = BACKEND_URL
    payload = {
        "model": MODEL_NAME,
        "stream": True,
        "messages": [
            {"role": "user", "content": input_text}
        ]
    }

    gen = chat_stream_generator(url, payload)
    for response in gen:
        history.append((input_text, response))
        yield None, history
        history.pop()
    history.append((input_text, response))
    return None, gen

logger.info('Starting the OrionStart-Yi-34B Chat...')

# Gradio interface
block = gr.Blocks()

with block as demo:
    gr.Markdown("<center><h1>OrionStar-Yi-34B-Chat Demo</h1></center>")
    gr.Markdown("""
* The Yi series LLM models are large-scale models open-sourced by the 01.AI team, achieving commendable results on various authoritative Chinese, English, and general domain benchmarks.
* [Orionstar](https://www.orionstar.com/) has further tapped into the potential of the Orionstar-Yi-34B-Chat with the Yi-34B model. By deeply training on a large corpus of high-quality fine-tuning data, we are dedicated to making it an outstanding open-source alternative in the ChatGPT field.
* Orionstar-Yi-34B-Chat performs impressively on mainstream evaluation sets such as C-Eval, MMLU, and CMMLU, significantly outperforming other open-source conversational models around the world(as of November 2023). For a detailed comparison with other open-source models, see [here](https://github.com/OrionStarAI/OrionStar-Yi-34B-Chat).
* Please click Star to support us on [Github](https://github.com/OrionStarAI/OrionStar-Yi-34B-Chat).""")

    chatbot = gr.Chatbot(label='OrionStar-Yi-34B-Chat', elem_classes="control-height")
    message = gr.Textbox(label='Input')

    with gr.Row():
        submit = gr.Button("🚀 Submit")
        clear_history = gr.Button("🧹 Clear History")

    submit.click(
        fn=generate_chat,
        inputs=[message, chatbot],
        outputs=[message, chatbot]
    )
    clear_history.click(
        fn=clear_session,
        inputs=[],
        outputs=[message, chatbot],
        queue=False
    )

demo.queue(api_open=False).launch(height=800, share=False, show_api=False)