llama2 / app.py
Waseem7711's picture
Update app.py
822b0d9 verified
raw
history blame
3.34 kB
# app.py
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Retrieve Hugging Face API token from environment variables
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
# Streamlit app setup
st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
@st.cache_resource
def load_model():
"""
Load the tokenizer and model from Hugging Face.
This function is cached to prevent re-loading on every interaction.
"""
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
use_auth_token=HF_API_TOKEN # Use the secret token
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-chat-hf",
torch_dtype=torch.float16, # Use float16 for reduced memory usage
device_map="auto",
use_auth_token=HF_API_TOKEN # Use the secret token
)
return tokenizer, model
# Load the model and tokenizer
tokenizer, model = load_model()
# Initialize session state for conversation history
if "conversation" not in st.session_state:
st.session_state.conversation = []
# User input
user_input = st.text_input("You:", "")
if user_input:
st.session_state.conversation.append({"role": "user", "content": user_input})
with st.spinner("Generating response..."):
try:
# Prepare the conversation history for the model
conversation_text = ""
for message in st.session_state.conversation:
if message["role"] == "user":
conversation_text += f"User: {message['content']}\n"
elif message["role"] == "assistant":
conversation_text += f"Assistant: {message['content']}\n"
# Encode the input
inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
# Generate a response
output = model.generate(
inputs,
max_length=1000,
temperature=0.7,
top_p=0.9,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id # To avoid warnings
)
# Decode the response
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract the assistant's reply
assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
# Append the assistant's reply to the conversation history
st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
# Display the updated conversation
conversation_display = ""
for message in st.session_state.conversation:
if message["role"] == "user":
conversation_display += f"**You:** {message['content']}\n\n"
elif message["role"] == "assistant":
conversation_display += f"**Bot:** {message['content']}\n\n"
st.markdown(conversation_display)
except Exception as e:
st.error(f"An error occurred: {e}")