Spaces:
Runtime error
Runtime error
File size: 4,531 Bytes
28d9ec7 b3fbb05 c88e903 b3fbb05 28d9ec7 4948643 73f296b 4948643 73f296b eef56d3 fdd85ff a2023d1 81c4ebe e1452b8 28d9ec7 bcad892 82d3d8b a2023d1 bcad892 a2023d1 e5cdfca eac33ce 4948643 a2023d1 28d9ec7 4948643 a2023d1 28d9ec7 af111fb dc11b12 af111fb dc11b12 73a863b af111fb dc11b12 73a863b dc11b12 af111fb abd43a8 dd747fe abd43a8 ffe98a6 abd43a8 dc11b12 abd43a8 a780e3c af111fb 1d31b76 af111fb 28d9ec7 73a863b 3f3cf8a 28d9ec7 7d2b56b 81c4ebe 99832bd 81c4ebe cf091b6 81c4ebe fe03291 a2023d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
import gradio as gr
def mental_chat(message, history):
return givetext(patienttext,newmodel,newtokenizer)
demo = gr.ChatInterface(mental_chat)
demo.launch()
"""
#pip install huggingface_hub
#python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"
#!pip install accelerate
#!pip install -i
import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
# ##### ##### ##### ##### #####
peft_model_id = "charansr/llama2-7b-chat-hf-therapist"
config = PeftConfig.from_pretrained(peft_model_id,
use_auth_token="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", load_in_8bit=True, device_map='cpu',)
newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='cpu',
use_auth_token="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")
newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
use_auth_token="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", load_in_8bit=True, device_map='cpu',)
# Load the Lora model
newmodel = PeftModel.from_pretrained(newmodel, peft_model_id,
use_auth_token="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", load_in_8bit=True, device_map='cpu')
def givetext(input_text,lmodel,ltokenizer):
try:
eval_prompt_pt1 = "\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
eval_prompt_pt2="\n\n\n### Response:\n"
eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2
print(eval_prompt,"\n\n")
print("BEFORE PROCESSING MODEL INPUT")
model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cpu")
print(" BEFORE EVAL LMODEL")
lmodel.eval()
print("BEFORE DOING TORCH.NO_GRAD()")
with torch.no_grad():
#print("BEFORE RETURNING")
print("BEFORE ATTEMPTING TO MOVE LMODEL TO CPU")
lmodel = lmodel.to("cpu")
print("BEFORE GENERATING LMODEL")
lmodel_generated = lmodel.generate(**model_input, max_new_tokens=1000)[0] # device and device_map (for "cpu") are not valid arguments
print("BEFORE GENERATING LTOKENIZER")
return (ltokenizer.decode(lmodel_generated, skip_special_tokens=True))
#return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True))
#return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))
except Exception as error:
print("Exception {error}".format(error = error))
#txt1 = "My name is {fname}, I'm {age}".format(fname = "John", age = 36)
def mental_chat(message, history):
print("BEFORE CALLING GIVETEXT")
return givetext(message,newmodel,newtokenizer)
demo = gr.ChatInterface(mental_chat)
demo.launch() #
"""
import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "charansr/llama2-7b-chat-hf-therapist"
# Load the Lora model
newmodel = PeftModel.from_pretrained(peft_model_id, use_auth_token="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", device_map="cpu",
model_id=peft_model_id)
newtokenizer = AutoTokenizer.from_pretrained(peft_model_id, use_auth_token="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")
def givetext(input_text, lmodel, ltokenizer):
eval_prompt_pt1 = \nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
eval_prompt_pt2 = "\n\n\n### Response:\n"
eval_prompt = eval_prompt_pt1 + input_text + eval_prompt_pt2
print(eval_prompt, "\n\n")
model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda")
lmodel.eval()
with torch.no_grad():
return ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)
def mental_chat(message, history):
return givetext(message, newmodel, newtokenizer)
demo = gr.ChatInterface(mental_chat)
demo.launch()
""" |