File size: 5,863 Bytes
9afdfd9
 
 
 
 
 
 
 
 
 
afba2c3
9afdfd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654c1d2
9afdfd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftConfig, PeftModel
import warnings
from threading import Thread

warnings.filterwarnings("ignore")

PEFT_MODEL = "givyboy/TinyLlama-1.1B-Chat-v1.0-mental-health-conversational"

SYSTEM_PROMPT = """Answer the following question truthfully.
  If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
  If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""

USER_PROMPT = lambda x: f"""<HUMAN>: {x}\n<ASSISTANT>: """
ADD_RESPONSE = lambda x, y: f"""<HUMAN>: {x}\n<ASSISTANT>: {y}"""
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

config = PeftConfig.from_pretrained(PEFT_MODEL)

peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    # quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    offload_folder="offload/",
    offload_state_dict=True,
)

peft_model = PeftModel.from_pretrained(
    peft_base_model,
    PEFT_MODEL,
    offload_folder="offload/",
    offload_state_dict=True,
)
peft_model = peft_model.to(DEVICE)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token

pipeline = transformers.pipeline(
    "text-generation",
    model=peft_model,
    tokenizer=peft_tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)


# def format_message(message: str, history: list[str], memory_limit: int = 3) -> str:
#     if len(history) > memory_limit:
#         history = history[-memory_limit:]

#     if len(history) == 0:
#         return f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"

#     formatted_message = f"{SYSTEM_PROMPT}\n{ADD_RESPONSE(history[0][0], history[0][1])}"

#     for msg, ans in history[1:]:
#         formatted_message += f"\n{ADD_RESPONSE(msg, ans)}"

#     formatted_message += f"\n{USER_PROMPT(message)}"
#     return formatted_message


# def get_model_response(message: str, history: list[str]) -> str:
#     formatted_message = format_message(message, history)
#     sequences = pipeline(
#         formatted_message,
#         do_sample=True,
#         top_k=10,
#         num_return_sequences=1,
#         eos_token_id=peft_tokenizer.eos_token_id,
#         max_length=600,
#     )[0]
#     print(sequences["generated_text"])
#     output = sequences["generated_text"].split("<ASSISTANT>:")[-1].strip()
#     # print(f"Response: {output}")
#     return output


start_message = ""


def user(message, history):
    # Append the user's message to the conversation history
    return "", history + [[message, ""]]


def chat(message, history):
    chat_history = []
    for item in history:
        chat_history.append({"role": "user", "content": item[0]})
        if item[1] is not None:
            chat_history.append({"role": "assistant", "content": item[1]})

    message = f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
    chat_history.append({"role": "user", "content": message})
    messages = peft_tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)

    # Tokenize the messages string
    model_inputs = peft_tokenizer([messages], return_tensors="pt").to(DEVICE)
    streamer = transformers.TextIteratorStreamer(
        peft_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
    )
    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=1024,
        do_sample=True,
        top_p=0.95,
        top_k=1000,
        temperature=0.75,
        num_beams=1,
    )
    t = Thread(target=peft_model.generate, kwargs=generate_kwargs)
    t.start()

    # Initialize an empty string to store the generated text
    partial_text = ""
    for new_text in streamer:
        # print(new_text)
        partial_text += new_text
        # Yield an empty string to cleanup the message textbox and the updated conversation history
        yield partial_text


chat = gr.ChatInterface(fn=chat, title="Mental Health Chatbot - SHEKHAR")
chat.launch(share=True)

# import os
# from openai import OpenAI
# from dotenv import load_dotenv
# import gradio as gr

# load_dotenv()
# API_KEY = os.getenv("OPENAI_API_KEY")
# openai = OpenAI(api_key=API_KEY)

# create_msg = lambda x, y: {"role": x, "content": y}

# SYSTEM_PROMPT = create_msg(
#     "system",
#     """You are a helpful mental health chatbot, please answer with care. If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'. If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.""".strip(),
# )


# def predict(message, history):
#     history_openai_format = []
#     history_openai_format.append(SYSTEM_PROMPT)
#     for human, assistant in history:
#         history_openai_format.append({"role": "user", "content": human})
#         history_openai_format.append({"role": "assistant", "content": assistant})
#     history_openai_format.append({"role": "user", "content": message})

#     response = openai.chat.completions.create(
#         model="ft:gpt-3.5-turbo-0613:personal::8kBTG8eh", messages=history_openai_format, temperature=0.35, stream=True
#     )

#     partial_message = ""
#     for chunk in response:
#         if chunk.choices[0].delta.content is not None:
#             partial_message = partial_message + chunk.choices[0].delta.content
#             yield partial_message


# gr.ChatInterface(fn=predict, title="Mental Health Chatbot").launch(share=True)