Breeze

Runtime error

App Files Files Community

Breeze / app.py

cllatMTK

Update app.py

b62d3e2 verified 10 months ago

raw

history blame contribute delete

6.38 kB

	import os

	import gradio as gr
	from text_generation import Client
	from conversation import get_conv_template
	from transformers import AutoTokenizer
	from pymongo import MongoClient

	DB_NAME = os.getenv("MONGO_DBNAME", "taiwan-llm")
	USER = os.getenv("MONGO_USER")
	PASSWORD = os.getenv("MONGO_PASSWORD")

	uri = f"mongodb+srv://{USER}:{PASSWORD}@{DB_NAME}.kvwjiok.mongodb.net/?retryWrites=true&w=majority"
	mongo_client = MongoClient(uri)
	db = mongo_client[DB_NAME]
	conversations_collection = db['conversations']

	DESCRIPTION = """
	# Breeze
	"""

	LICENSE = """
	"""

	DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."

	endpoint_url = os.environ.get("ENDPOINT_URL", "http://127.0.0.1:8080")
	client = Client(endpoint_url, timeout=120)
	eos_token = "</s>"
	MAX_MAX_NEW_TOKENS = 4096
	DEFAULT_MAX_NEW_TOKENS = 1536

	max_prompt_length = 8192 - MAX_MAX_NEW_TOKENS - 10

	model_name = "yentinglin/Taiwan-LLM-7B-v2.0-chat"
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	with gr.Blocks() as demo:
	gr.Markdown(DESCRIPTION)

	chatbot = gr.Chatbot()
	with gr.Row():
	msg = gr.Textbox(
	container=False,
	show_label=False,
	placeholder='Type a message...',
	scale=10,
	)
	submit_button = gr.Button('Submit',
	variant='primary',
	scale=1,
	min_width=0)

	with gr.Row():
	retry_button = gr.Button('🔄 Retry', variant='secondary')
	undo_button = gr.Button('↩️ Undo', variant='secondary')
	clear = gr.Button('🗑️ Clear', variant='secondary')

	saved_input = gr.State()

	with gr.Accordion(label='Advanced options', open=False):
	system_prompt = gr.Textbox(label='System prompt',
	value=DEFAULT_SYSTEM_PROMPT,
	lines=6)
	max_new_tokens = gr.Slider(
	label='Max new tokens',
	minimum=1,
	maximum=MAX_MAX_NEW_TOKENS,
	step=1,
	value=DEFAULT_MAX_NEW_TOKENS,
	)
	temperature = gr.Slider(
	label='Temperature',
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	value=0.3,
	)
	top_p = gr.Slider(
	label='Top-p (nucleus sampling)',
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.95,
	)
	top_k = gr.Slider(
	label='Top-k',
	minimum=1,
	maximum=1000,
	step=1,
	value=50,
	)

	def user(user_message, history):
	return "", history + [[user_message, None]]


	def bot(history, max_new_tokens, temperature, top_p, top_k, system_prompt):
	conv = get_conv_template("twllm_v2").copy()
	roles = {"human": conv.roles[0], "gpt": conv.roles[1]} # map human to USER and gpt to ASSISTANT
	conv.system = system_prompt
	for user, bot in history:
	conv.append_message(roles['human'], user)
	conv.append_message(roles["gpt"], bot)
	msg = conv.get_prompt()
	prompt_tokens = tokenizer.encode(msg)
	length_of_prompt = len(prompt_tokens)
	if length_of_prompt > max_prompt_length:
	msg = tokenizer.decode(prompt_tokens[-max_prompt_length + 1:])

	history[-1][1] = ""
	for response in client.generate_stream(
	msg,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=1.1,
	):
	if not response.token.special:
	character = response.token.text
	history[-1][1] += character
	yield history

	# After generating the response, store the conversation history in MongoDB
	conversation_document = {
	"model_name": model_name,
	"history": history,
	"system_prompt": system_prompt,
	"max_new_tokens": max_new_tokens,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k,
	}
	conversations_collection.insert_one(conversation_document)

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	system_prompt,
	],
	outputs=chatbot
	)
	submit_button.click(
	user, [msg, chatbot], [msg, chatbot], queue=False
	).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	system_prompt,
	],
	outputs=chatbot
	)


	def delete_prev_fn(
	history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
	try:
	message, _ = history.pop()
	except IndexError:
	message = ''
	return history, message or ''


	def display_input(message: str,
	history: list[tuple[str, str]]) -> list[tuple[str, str]]:
	history.append((message, ''))
	return history

	retry_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=display_input,
	inputs=[saved_input, chatbot],
	outputs=chatbot,
	api_name=False,
	queue=False,
	).then(
	fn=bot,
	inputs=[
	chatbot,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	system_prompt,
	],
	outputs=chatbot,
	)

	undo_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=lambda x: x,
	inputs=[saved_input],
	outputs=msg,
	api_name=False,
	queue=False,
	)

	clear.click(lambda: None, None, chatbot, queue=False)

	gr.Markdown(LICENSE)

	demo.queue(concurrency_count=4, max_size=128)
	demo.launch()