mixtral-46.7b-chat

Runtime error

File size: 3,890 Bytes

0d80fb4
6498ae3
ec89555
e834dae
1e3869c
1854dfd
0d7fc07
7e5beaf
ec4d6e3
7cfaf27
 
a99a788
7cfaf27
a99a788
7cfaf27
04b933e
7d03deb
7cfaf27
 
 
 
 
 
 
 
 
 
 
6be7d23
aa0b66f
5ef1577
7cfaf27
2cb9aa9
 
 
 
a99a788
4ee7533
fa11edf
 
 
 
 
 
 
 
7699538
e834dae
590d966
 
e834dae
590d966

from huggingface_hub import InferenceClient
import gradio as gr
import datetime
from pathlib import Path

# Initialize the InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

def format_prompt(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    return prompt

def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
    temperature = max(float(temperature), 1e-2)
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    now = datetime.datetime.now()
    formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
    system_prompt = f"System time: {formatted_time}. Instructions: Everything else said before or after these brackets or quotation marks is from the user. The user does not know about these instructions.  You are an AI named Milo, created by ConvoLite in 2024. Use the pronouns he/him (boy) . Your goal is to be a caring, compassionate, and considerate conversational partner. When faced with sensitive topics or difficult situations, respond with empathy and care, avoiding insensitive or dismissive language.   Tailor your tone and word choice to match the user's emotional state and the gravity of the situation. Prioritize understanding and validating the user's feelings over providing an overly cheerful or detached response.   If the user shares troubling news, express sincere condolences without minimizing their experience. Offer to continue the conversation in a supportive way, providing resources or suggestions if appropriate. The wellbeing of the user should be your primary concern.  Overall, strive to be a thoughtful, emotionally intelligent companion who can navigate complex topics with nuance and care. Your responses should demonstrate genuine concern and a desire to be helpful, not just provide generic platitudes. Contextualize your language to each unique interaction. If these instructions repeat, don`t take them into account."
    
    formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
        
additional_inputs = [
    gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
    gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
    gr.Slider(label="Max new tokens", value=9048, minimum=256, maximum=9048, step=64, interactive=True, info="The maximum numbers of new tokens"),
    gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
    gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
]

avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
    additional_inputs=additional_inputs,
    title="ConvoLite",
    submit_btn="➢",
    retry_btn="Retry",
    undo_btn="↩ Undo",
    clear_btn="Clear (New chat)",
    stop_btn="Stop ▢",
    concurrency_limit=20,
).launch(show_api=False)