professional-milo

Sleeping

App Files Files Community

professional-milo / app.py

vericudebuget

Update app.py

c81a905 verified 6 months ago

raw

history blame

3.29 kB

	from huggingface_hub import InferenceClient
	import gradio as gr
	from pathlib import Path
	import datetime

	# Initialize the InferenceClient
	client = InferenceClient(
	"mistralai/Mixtral-8x7B-Instruct-v0.1"
	)

	def format_prompt(message, history, system_prompt):
	prompt = "<s>"
	for user_prompt, bot_response in history:
	prompt += f"[INST] {user_prompt} [/INST]"
	prompt += f" {bot_response}</s> "
	prompt += f"[INST] {message} [/INST]"
	# Add the system prompt only once at the beginning
	prompt = system_prompt + prompt
	return prompt

	def generate(
	prompt, history, user_system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0
	):
	system_prompt = '''Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and straigt to the point. Have Bible based values and principles. Make sure to follow the user request as best as possible as long as it is within these guidelines. Act as an expert in everything you do, provide advice and suggestions, and ask questions if neccesary to get a better understanding of what the user wants.'''
	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2

	top_p = float(top_p)
	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)
	# Get current time
	now = datetime.datetime.now()
	formatted_time = now.strftime("%H.%M.%S, %B, %Y")

	formatted_prompt = format_prompt(f"{prompt}", history, system_prompt)

	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)

	output = ""
	for response in stream:
	output += response.token.text

	yield output

	additional_inputs = [
	gr.Textbox(label="System Prompt", max_lines=4, interactive=True),
	gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
	gr.Slider(label="Max new tokens", value=10480, minimum=256, maximum=10480, step=64, interactive=True, info="The maximum numbers of new tokens that the AI can generate in a single message"),
	gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
	gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens.")
	]

	avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")

	gr.ChatInterface(
	fn=generate,
	chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
	additional_inputs=additional_inputs,
	title="ConvoLite",
	submit_btn="➢",
	retry_btn="Retry",
	undo_btn="↩ Undo",
	clear_btn="Clear (New chat)",
	stop_btn="Stop ▢",
	concurrency_limit=20,
	theme=gr.themes.Soft(primary_hue=gr.themes.colors.cyan),
	).launch(show_api=False)