""" import gradio as gr def mental_chat(message, history): return givetext(patienttext,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch() """ peft_model_id = "charansr/llama2-7b-chat-hf-therapist" config = PeftConfig.from_pretrained(peft_model_id) newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto') newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Load the Lora model newmodel = PeftModel.from_pretrained(newmodel, peft_model_id) def givetext(input_text,lmodel,ltokenizer): eval_prompt_pt1 = """\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: """ eval_prompt_pt2="""\n\n\n### Response:\n""" eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2 print(eval_prompt,"\n\n") model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda") lmodel.eval() with torch.no_grad(): return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)) #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True)) def mental_chat(message, history): return givetext(patienttext,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch()