chatbot / app.py
shekhardhangar's picture
Update app.py
654c1d2 verified
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftConfig, PeftModel
import warnings
from threading import Thread
warnings.filterwarnings("ignore")
PEFT_MODEL = "givyboy/TinyLlama-1.1B-Chat-v1.0-mental-health-conversational"
SYSTEM_PROMPT = """Answer the following question truthfully.
If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""
USER_PROMPT = lambda x: f"""<HUMAN>: {x}\n<ASSISTANT>: """
ADD_RESPONSE = lambda x, y: f"""<HUMAN>: {x}\n<ASSISTANT>: {y}"""
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16,
)
config = PeftConfig.from_pretrained(PEFT_MODEL)
peft_base_model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
# quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
offload_folder="offload/",
offload_state_dict=True,
)
peft_model = PeftModel.from_pretrained(
peft_base_model,
PEFT_MODEL,
offload_folder="offload/",
offload_state_dict=True,
)
peft_model = peft_model.to(DEVICE)
peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token
pipeline = transformers.pipeline(
"text-generation",
model=peft_model,
tokenizer=peft_tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
# def format_message(message: str, history: list[str], memory_limit: int = 3) -> str:
# if len(history) > memory_limit:
# history = history[-memory_limit:]
# if len(history) == 0:
# return f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
# formatted_message = f"{SYSTEM_PROMPT}\n{ADD_RESPONSE(history[0][0], history[0][1])}"
# for msg, ans in history[1:]:
# formatted_message += f"\n{ADD_RESPONSE(msg, ans)}"
# formatted_message += f"\n{USER_PROMPT(message)}"
# return formatted_message
# def get_model_response(message: str, history: list[str]) -> str:
# formatted_message = format_message(message, history)
# sequences = pipeline(
# formatted_message,
# do_sample=True,
# top_k=10,
# num_return_sequences=1,
# eos_token_id=peft_tokenizer.eos_token_id,
# max_length=600,
# )[0]
# print(sequences["generated_text"])
# output = sequences["generated_text"].split("<ASSISTANT>:")[-1].strip()
# # print(f"Response: {output}")
# return output
start_message = ""
def user(message, history):
# Append the user's message to the conversation history
return "", history + [[message, ""]]
def chat(message, history):
chat_history = []
for item in history:
chat_history.append({"role": "user", "content": item[0]})
if item[1] is not None:
chat_history.append({"role": "assistant", "content": item[1]})
message = f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
chat_history.append({"role": "user", "content": message})
messages = peft_tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
# Tokenize the messages string
model_inputs = peft_tokenizer([messages], return_tensors="pt").to(DEVICE)
streamer = transformers.TextIteratorStreamer(
peft_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=1024,
do_sample=True,
top_p=0.95,
top_k=1000,
temperature=0.75,
num_beams=1,
)
t = Thread(target=peft_model.generate, kwargs=generate_kwargs)
t.start()
# Initialize an empty string to store the generated text
partial_text = ""
for new_text in streamer:
# print(new_text)
partial_text += new_text
# Yield an empty string to cleanup the message textbox and the updated conversation history
yield partial_text
chat = gr.ChatInterface(fn=chat, title="Mental Health Chatbot - SHEKHAR")
chat.launch(share=True)
# import os
# from openai import OpenAI
# from dotenv import load_dotenv
# import gradio as gr
# load_dotenv()
# API_KEY = os.getenv("OPENAI_API_KEY")
# openai = OpenAI(api_key=API_KEY)
# create_msg = lambda x, y: {"role": x, "content": y}
# SYSTEM_PROMPT = create_msg(
# "system",
# """You are a helpful mental health chatbot, please answer with care. If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'. If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.""".strip(),
# )
# def predict(message, history):
# history_openai_format = []
# history_openai_format.append(SYSTEM_PROMPT)
# for human, assistant in history:
# history_openai_format.append({"role": "user", "content": human})
# history_openai_format.append({"role": "assistant", "content": assistant})
# history_openai_format.append({"role": "user", "content": message})
# response = openai.chat.completions.create(
# model="ft:gpt-3.5-turbo-0613:personal::8kBTG8eh", messages=history_openai_format, temperature=0.35, stream=True
# )
# partial_message = ""
# for chunk in response:
# if chunk.choices[0].delta.content is not None:
# partial_message = partial_message + chunk.choices[0].delta.content
# yield partial_message
# gr.ChatInterface(fn=predict, title="Mental Health Chatbot").launch(share=True)