import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Initialize model and tokenizer model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3.5-mini-instruct", device_map="cpu", torch_dtype=torch.float16, low_cpu_mem_usage=True, trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct") # Create pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer ) # Generation arguments generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } def chat(message, history, system_prompt): # Prepare messages messages = [ {"role": "system", "content": system_prompt}, ] # Add history to messages for human, assistant in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": assistant}) # Add current message messages.append({"role": "user", "content": message}) # Generate response output = pipe(messages, **generation_args) response = output[0]['generated_text'] return response # Gradio interface with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") system_prompt = gr.Textbox(label="System Prompt", value="You are a helpful AI assistant.") def respond(message, chat_history): bot_message = chat(message, chat_history, system_prompt.value) chat_history.append((message, bot_message)) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()