Spaces:

Waseem7711
/

llama2

Sleeping

App Files Files Community

Waseem7711 commited on 28 days ago

Commit

c30ad40

•

1 Parent(s): b6e344f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -93

app.py CHANGED Viewed

@@ -1,104 +1,34 @@
 import streamlit as st
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Retrieve Hugging Face API token from environment variables
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
-import os
-# Access Hugging Face API Key from Hugging Face Secrets
-HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
-if not HUGGINGFACE_API_KEY:
-    raise ValueError("Hugging Face API Key not found. Please set it in the Hugging Face Secrets.")
-# Now you can use the API key securely in your code
-# Streamlit app setup
-st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
-st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
 @st.cache_resource
-def load_model():
-    """
-    Load the tokenizer and model from Hugging Face.
-    This function is cached to prevent re-loading on every interaction.
-    """
-    tokenizer = AutoTokenizer.from_pretrained(
-        "meta-llama/Llama-2-7b-chat-hf",
-        use_auth_token=HF_API_TOKEN  # Use the secret token
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        "meta-llama/Llama-2-7b-chat-hf",
-        torch_dtype=torch.float16,  # Use float16 for reduced memory usage
-        device_map="auto",
-        use_auth_token=HF_API_TOKEN  # Use the secret token
-    )
     return tokenizer, model
-# Load the model and tokenizer
-tokenizer, model = load_model()
-# Initialize session state for conversation history
-if "conversation" not in st.session_state:
-    st.session_state.conversation = []
-# User input
-user_input = st.text_input("You:", "")
-if user_input:
-    st.session_state.conversation.append({"role": "user", "content": user_input})
     with st.spinner("Generating response..."):
-        try:
-            # Prepare the conversation history for the model
-            conversation_text = ""
-            for message in st.session_state.conversation:
-                if message["role"] == "user":
-                    conversation_text += f"User: {message['content']}\n"
-                elif message["role"] == "assistant":
-                    conversation_text += f"Assistant: {message['content']}\n"
-            # Encode the input
-            inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
-            # Generate a response
-            output = model.generate(
-                inputs,
-                max_length=1000,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                eos_token_id=tokenizer.eos_token_id,
-                pad_token_id=tokenizer.eos_token_id  # To avoid warnings
-            )
-            # Decode the response
-            response = tokenizer.decode(output[0], skip_special_tokens=True)
-            # Extract the assistant's reply
-            assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
-            # Append the assistant's reply to the conversation history
-            st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
-            # Display the updated conversation
-            conversation_display = ""
-            for message in st.session_state.conversation:
-                if message["role"] == "user":
-                    conversation_display += f"**You:** {message['content']}\n\n"
-                elif message["role"] == "assistant":
-                    conversation_display += f"**Bot:** {message['content']}\n\n"
-            st.markdown(conversation_display)
-        except Exception as e:
-            st.error(f"An error occurred: {e}")

 import streamlit as st
+from transformers import AutoTokenizer, LlamaForCausalLM
 import torch
+# Title of the app
+st.title("LLaMA 2 Chatbot")
+# Load the LLaMA model and tokenizer from Hugging Face
 @st.cache_resource
+def load_model_and_tokenizer():
+    # Load the model and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+    model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
     return tokenizer, model
+# Function to generate text based on a prompt
+def generate_text(prompt, tokenizer, model):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # Generate text
+    with torch.no_grad():
+        generate_ids = model.generate(inputs.input_ids, max_length=50)
+    return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+# Input field for user prompt
+user_input = st.text_input("Enter your prompt:", "Hey, are you conscious? Can you talk to me?")
+# Load model and tokenizer
+tokenizer, model = load_model_and_tokenizer()
+# Generate response when user enters a prompt
+if st.button("Generate Response"):
     with st.spinner("Generating response..."):
+        response = generate_text(user_input, tokenizer, model)
+    st.write(f"Response: {response}")