File size: 4,564 Bytes
28d9ec7
b3fbb05
 
 
 
c88e903
b3fbb05
 
28d9ec7
 
 
4948643
73f296b
4948643
73f296b
 
eef56d3
 
fdd85ff
a2023d1
81c4ebe
e1452b8
 
 
 
 
 
 
28d9ec7
bcad892
82d3d8b
1526c6b
bcad892
1526c6b
56e762c
eac33ce
4948643
1526c6b
28d9ec7
 
4948643
1526c6b
28d9ec7
 
af111fb
 
 
 
 
dc11b12
 
 
af111fb
dc11b12
73a863b
af111fb
 
dc11b12
73a863b
dc11b12
af111fb
abd43a8
 
56e762c
dd747fe
56e762c
2cc3545
56e762c
2cc3545
56e762c
dd747fe
abd43a8
 
ffe98a6
abd43a8
 
dc11b12
abd43a8
a780e3c
af111fb
 
1d31b76
af111fb
28d9ec7
 
73a863b
3f3cf8a
28d9ec7
 
 
7d2b56b
81c4ebe
 
 
 
 
 
 
 
 
 
 
99832bd
 
81c4ebe
 
 
 
cf091b6
 
81c4ebe
 
 
 
 
 
 
 
 
 
 
 
 
 
fe03291
a2023d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
import gradio as gr

def mental_chat(message, history):
    return givetext(patienttext,newmodel,newtokenizer)
 
demo = gr.ChatInterface(mental_chat)

demo.launch()
"""

#pip install huggingface_hub

#python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"


#!pip install accelerate
#!pip install -i 



import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# ##### ##### ##### ##### #####

peft_model_id = "charansr/llama2-7b-chat-hf-therapist"

config = PeftConfig.from_pretrained(peft_model_id,
                                   use_auth_token="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", load_in_4bit=True)

newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True,
                                                use_auth_token="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").to("cpu")

newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
                                            use_auth_token="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", load_in_4bit=True).to("cpu")

# Load the Lora model
newmodel = PeftModel.from_pretrained(newmodel, peft_model_id,
                                    use_auth_token="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", load_in_4bit=True).to("cpu")

def givetext(input_text,lmodel,ltokenizer):
    try:
        eval_prompt_pt1 = "\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
        eval_prompt_pt2="\n\n\n### Response:\n"
        eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2
        print(eval_prompt,"\n\n")

        print("BEFORE PROCESSING MODEL INPUT")
        
        model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cpu")

        print(" BEFORE EVAL LMODEL")
        
        lmodel.eval()

        print("BEFORE DOING TORCH.NO_GRAD()")
        
        with torch.no_grad():
            #print("BEFORE RETURNING")

            #print("BEFORE ATTEMPTING TO MOVE LMODEL TO CPU")
            
            #lmodel = lmodel.to("cpu")

            #print("BEFORE ATTEMPTING .cpu()")

            #lmodel.cpu()
            
            print("BEFORE GENERATING LMODEL")
            
            lmodel_generated = lmodel.generate(**model_input, max_new_tokens=1000)[0] # device and device_map (for "cpu") are not valid arguments

            print("BEFORE GENERATING LTOKENIZER")
            
            return (ltokenizer.decode(lmodel_generated, skip_special_tokens=True))
            #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True))
            #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))
    except Exception as error:
        print("Exception {error}".format(error = error))
        #txt1 = "My name is {fname}, I'm {age}".format(fname = "John", age = 36)

def mental_chat(message, history):
    print("BEFORE CALLING GIVETEXT")
    return givetext(message,newmodel,newtokenizer)

demo = gr.ChatInterface(mental_chat)

demo.launch() # 

"""

import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "charansr/llama2-7b-chat-hf-therapist"

# Load the Lora model
newmodel = PeftModel.from_pretrained(peft_model_id, use_auth_token="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", device_map="cpu",
                                    model_id=peft_model_id)

newtokenizer = AutoTokenizer.from_pretrained(peft_model_id, use_auth_token="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")

def givetext(input_text, lmodel, ltokenizer):
    eval_prompt_pt1 = \nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
    eval_prompt_pt2 = "\n\n\n### Response:\n"
    eval_prompt = eval_prompt_pt1 + input_text + eval_prompt_pt2
    print(eval_prompt, "\n\n")
    model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda")

    lmodel.eval()
    with torch.no_grad():
        return ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)

def mental_chat(message, history):
    return givetext(message, newmodel, newtokenizer)

demo = gr.ChatInterface(mental_chat)

demo.launch()
 
"""