import gradio as gr import time from utils import format_as_chat import requests def chatbot_demo(message, history): if history: input_message = format_as_chat(message, history) else: input_message = format_as_chat(message, []) #Add another assistant delimiter at begining to make sure the output text doesn't contain 'assistant/n/n' json_obj = { "inputs": input_message + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', "parameters": { "best_of": 1, "decoder_input_details": False, "details": True, "do_sample": True, "frequency_penalty": 0.1, "grammar": None, "max_new_tokens": 500, "repetition_penalty": 1.03, "return_full_text": False, "seed": None, "stop": [ "photographer" ], "temperature": 0.5, "top_k": 1, "top_n_tokens": 5, "top_p": 0.95, "truncate": None, "typical_p": 0.95, "watermark": True } } response = requests.post('https://uf9t072wj5ki2ho4.eu-west-1.aws.endpoints.huggingface.cloud/generate', json=json_obj) data = response.json() llama_out = data['generated_text'] for i in range(len(llama_out)): time.sleep(0.05) yield llama_out[: i + 1] demo = gr.ChatInterface( fn=chatbot_demo, chatbot=gr.Chatbot(height=500), textbox=gr.Textbox(placeholder="Just ask Llama3 anything you want!", container=False, scale=15), examples=['Hey my maaaaaaaaaaaan! Whazzzzzzup!', 'Write me some crazy conversations between Rick and Morty', '帮我想几个关于去意大利旅游的小红书帖子标题。用中文回答后请用英语再翻译一遍'], cache_examples=False, title="Llama 3 8B Instruct", ) if __name__ == "__main__": demo.launch()