""" import gradio as gr def mental_chat(message, history): return givetext(patienttext,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch() """ #pip install huggingface_hub #python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')" #!pip install accelerate #!pip install -i import gradio as gr import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer # ##### ##### ##### ##### ##### peft_model_id = "charansr/llama2-7b-chat-hf-therapist" config = PeftConfig.from_pretrained(peft_model_id, use_auth_token="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", load_in_8bit=True, device_map='cpu',) newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='cpu', use_auth_token="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") newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, use_auth_token="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", load_in_8bit=True, device_map='cpu',) # Load the Lora model newmodel = PeftModel.from_pretrained(newmodel, peft_model_id, use_auth_token="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", load_in_8bit=True, device_map='cpu') def givetext(input_text,lmodel,ltokenizer): try: eval_prompt_pt1 = "\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: " eval_prompt_pt2="\n\n\n### Response:\n" eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2 print(eval_prompt,"\n\n") print("BEFORE PROCESSING MODEL INPUT") model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cpu") print(" BEFORE EVAL LMODEL") lmodel.eval() print("BEFORE DOING TORCH.NO_GRAD()") with torch.no_grad(): print("BEFORE RETURNING") return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)) #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True)) except Exception as error: print("Exception {error}".format(error = error)) #txt1 = "My name is {fname}, I'm {age}".format(fname = "John", age = 36) def mental_chat(message, history): print("BEFORE CALLING GIVETEXT") return givetext(message,newmodel,newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch() # """ import gradio as gr import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer peft_model_id = "charansr/llama2-7b-chat-hf-therapist" # Load the Lora model newmodel = PeftModel.from_pretrained(peft_model_id, use_auth_token="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", device_map="cpu", model_id=peft_model_id) newtokenizer = AutoTokenizer.from_pretrained(peft_model_id, use_auth_token="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") def givetext(input_text, lmodel, ltokenizer): eval_prompt_pt1 = \nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: " eval_prompt_pt2 = "\n\n\n### Response:\n" eval_prompt = eval_prompt_pt1 + input_text + eval_prompt_pt2 print(eval_prompt, "\n\n") model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda") lmodel.eval() with torch.no_grad(): return ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True) def mental_chat(message, history): return givetext(message, newmodel, newtokenizer) demo = gr.ChatInterface(mental_chat) demo.launch() """