Spaces:
Sleeping
Sleeping
import gradio as gr | |
import time | |
from utils import format_as_chat | |
import requests | |
def chatbot_demo(message, history): | |
if history: | |
input_message = format_as_chat(message, history) | |
else: | |
input_message = format_as_chat(message, []) | |
#Add another assistant delimiter at begining to make sure the output text doesn't contain 'assistant/n/n' | |
json_obj = { | |
"inputs": input_message + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', | |
"parameters": { | |
"best_of": 1, | |
"decoder_input_details": False, | |
"details": True, | |
"do_sample": True, | |
"frequency_penalty": 0.1, | |
"grammar": None, | |
"max_new_tokens": 500, | |
"repetition_penalty": 1.03, | |
"return_full_text": False, | |
"seed": None, | |
"stop": [ | |
"photographer" | |
], | |
"temperature": 0.5, | |
"top_k": 1, | |
"top_n_tokens": 5, | |
"top_p": 0.95, | |
"truncate": None, | |
"typical_p": 0.95, | |
"watermark": True | |
} | |
} | |
response = requests.post('https://uf9t072wj5ki2ho4.eu-west-1.aws.endpoints.huggingface.cloud/generate', json=json_obj) | |
data = response.json() | |
llama_out = data['generated_text'] | |
for i in range(len(llama_out)): | |
time.sleep(0.05) | |
yield llama_out[: i + 1] | |
demo = gr.ChatInterface( | |
fn=chatbot_demo, | |
chatbot=gr.Chatbot(height=500), | |
textbox=gr.Textbox(placeholder="Just ask Llama3 anything you want!", container=False, scale=15), | |
examples=['Hey my maaaaaaaaaaaan! Whazzzzzzup!', 'Write me some crazy conversations between Rick and Morty', '帮我想几个关于去意大利旅游的小红书帖子标题。用中文回答后请用英语再翻译一遍'], | |
cache_examples=False, | |
title="Llama 3 8B Instruct", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |