import gradio as gr
from transformers import pipeline
import torch

DEFAULT_SYSTEM_PROMPT = """\
You are Menthelp, a mental health chatbot. Please help the user with their concerns.\
"""
MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024

if not torch.cuda.is_available():
    DEFAULT_SYSTEM_PROMPT += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'

# Create a pipeline using the Hugging Face Llama-2-7b-chat model
pipe = pipeline(
    "chat",
    model="huggingface-projects/llama-2-7b-chat",
    tokenizer="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",
    use_auth_token=True,
)

# Define a function to interact with the pipeline using Gradio
def llama_2_7b_chatbot(message):
    return pipe(message, system_prompt=DEFAULT_SYSTEM_PROMPT, max_new_tokens=DEFAULT_MAX_NEW_TOKENS)

# Define the Gradio interface
iface = gr.Interface(
    fn=llama_2_7b_chatbot,
    inputs=gr.Textbox(label="Input your message:", placeholder="Type a message..."),
    outputs=gr.Textbox(label="Chatbot response:"),
    live=True,
    capture_session=True,
    title="Llama-2 7B Chat",
    description="This Space demonstrates model [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta, a Llama 2 model with 7B parameters fine-tuned for chat instructions. Feel free to play with it!",
)

# Launch the Gradio interface
iface.launch()