""" import gradio as gr def mental_chat(message, history): return givetext(patienttext,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch() """ #pip install huggingface_hub #python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')" !pip install accelerate !pip install -i import gradio as gr import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer # ##### ##### ##### ##### ##### peft_model_id = "charansr/llama2-7b-chat-hf-therapist" config = PeftConfig.from_pretrained(peft_model_id, use_auth_token="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") newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto', use_auth_token="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") newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, use_auth_token="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") # Load the Lora model newmodel = PeftModel.from_pretrained(newmodel, peft_model_id, use_auth_token="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") def givetext(input_text,lmodel,ltokenizer): eval_prompt_pt1 = """\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: """ eval_prompt_pt2="""\n\n\n### Response:\n""" eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2 print(eval_prompt,"\n\n") model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda") lmodel.eval() with torch.no_grad(): return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)) #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True)) def mental_chat(message, history): return givetext(patienttext,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch()