File size: 3,577 Bytes
28d9ec7
b3fbb05
 
 
 
 
 
 
28d9ec7
 
 
4948643
73f296b
4948643
73f296b
 
eef56d3
 
fdd85ff
a2023d1
81c4ebe
e1452b8
 
 
 
 
 
 
28d9ec7
bcad892
82d3d8b
a2023d1
bcad892
a2023d1
e5cdfca
eac33ce
4948643
a2023d1
28d9ec7
 
4948643
a2023d1
28d9ec7
 
81c4ebe
 
28d9ec7
 
 
 
 
 
 
 
 
 
3f3cf8a
28d9ec7
 
 
81c4ebe
 
 
 
 
 
 
 
 
 
 
 
99832bd
 
81c4ebe
 
 
 
cf091b6
 
81c4ebe
 
 
 
 
 
 
 
 
 
 
 
 
 
a2023d1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
import gradio as gr

def mental_chat(message, history):
    return givetext(patienttext,newmodel,newtokenizer)

demo = gr.ChatInterface(mental_chat)

demo.launch()
"""

#pip install huggingface_hub

#python -c "from huggingface_hub.hf_api import HfFolder; HfFolder.save_token('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')"


#!pip install accelerate
#!pip install -i 



import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# ##### ##### ##### ##### #####

peft_model_id = "charansr/llama2-7b-chat-hf-therapist"

config = PeftConfig.from_pretrained(peft_model_id,
                                   use_auth_token="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", load_in_8bit=True, device_map='cpu',)

newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='cpu',
                                                use_auth_token="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")

newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
                                            use_auth_token="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", load_in_8bit=True, device_map='cpu',)

# Load the Lora model
newmodel = PeftModel.from_pretrained(newmodel, peft_model_id,
                                    use_auth_token="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", load_in_8bit=True, device_map='cpu')

def givetext(input_text,lmodel,ltokenizer):
  eval_prompt_pt1 = "\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
  eval_prompt_pt2="\n\n\n### Response:\n"
  eval_prompt=eval_prompt_pt1+input_text+eval_prompt_pt2
  print(eval_prompt,"\n\n")
  model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda")

  lmodel.eval()
  with torch.no_grad():
    return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True))
    #return (ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

def mental_chat(message, history):
    return givetext(message,newmodel,newtokenizer)

demo = gr.ChatInterface(mental_chat)

demo.launch()

"""

import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "charansr/llama2-7b-chat-hf-therapist"

# Load the Lora model
newmodel = PeftModel.from_pretrained(peft_model_id, use_auth_token="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", device_map="cpu",
                                    model_id=peft_model_id)

newtokenizer = AutoTokenizer.from_pretrained(peft_model_id, use_auth_token="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")

def givetext(input_text, lmodel, ltokenizer):
    eval_prompt_pt1 = \nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction: Act like a therapist and respond\n\n### Input: "
    eval_prompt_pt2 = "\n\n\n### Response:\n"
    eval_prompt = eval_prompt_pt1 + input_text + eval_prompt_pt2
    print(eval_prompt, "\n\n")
    model_input = ltokenizer(eval_prompt, return_tensors="pt").to("cuda")

    lmodel.eval()
    with torch.no_grad():
        return ltokenizer.decode(lmodel.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)

def mental_chat(message, history):
    return givetext(message, newmodel, newtokenizer)

demo = gr.ChatInterface(mental_chat)

demo.launch()

"""