CedricZ's picture
Upload app.py
9e5115b verified
import gradio as gr
import time
from utils import format_as_chat
import requests
def chatbot_demo(message, history):
if history:
input_message = format_as_chat(message, history)
else:
input_message = format_as_chat(message, [])
#Add another assistant delimiter at begining to make sure the output text doesn't contain 'assistant/n/n'
json_obj = {
"inputs": input_message + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n',
"parameters": {
"best_of": 1,
"decoder_input_details": False,
"details": True,
"do_sample": True,
"frequency_penalty": 0.1,
"grammar": None,
"max_new_tokens": 500,
"repetition_penalty": 1.03,
"return_full_text": False,
"seed": None,
"stop": [
"photographer"
],
"temperature": 0.5,
"top_k": 1,
"top_n_tokens": 5,
"top_p": 0.95,
"truncate": None,
"typical_p": 0.95,
"watermark": True
}
}
response = requests.post('https://uf9t072wj5ki2ho4.eu-west-1.aws.endpoints.huggingface.cloud/generate', json=json_obj)
data = response.json()
llama_out = data['generated_text']
for i in range(len(llama_out)):
time.sleep(0.05)
yield llama_out[: i + 1]
demo = gr.ChatInterface(
fn=chatbot_demo,
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Just ask Llama3 anything you want!", container=False, scale=15),
examples=['Hey my maaaaaaaaaaaan! Whazzzzzzup!', 'Write me some crazy conversations between Rick and Morty', '帮我想几个关于去意大利旅游的小红书帖子标题。用中文回答后请用英语再翻译一遍'],
cache_examples=False,
title="Llama 3 8B Instruct",
)
if __name__ == "__main__":
demo.launch()