Spaces:

Waseem7711
/

llama2

Sleeping

Waseem7711 commited on about 1 month ago

Commit

822b0d9

•

1 Parent(s): b51e4ba

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,11 +9,8 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Retrieve Hugging Face API token from environment variables (if accessing private models)
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure you set this in Hugging Face Secrets
 # Streamlit app setup
 st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
@@ -27,13 +24,13 @@ def load_model():
     """
     tokenizer = AutoTokenizer.from_pretrained(
         "meta-llama/Llama-2-7b-chat-hf",
-        use_auth_token= use your api key  # Remove if the model is public
     )
     model = AutoModelForCausalLM.from_pretrained(
         "meta-llama/Llama-2-7b-chat-hf",
         torch_dtype=torch.float16,  # Use float16 for reduced memory usage
         device_map="auto",
-        use_auth_token=HF_API_TOKEN  # Remove if the model is public
     )
     return tokenizer, model

 # Load environment variables
 load_dotenv()
+# Retrieve Hugging Face API token from environment variables
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 # Streamlit app setup
 st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
     """
     tokenizer = AutoTokenizer.from_pretrained(
         "meta-llama/Llama-2-7b-chat-hf",
+        use_auth_token=HF_API_TOKEN  # Use the secret token
     )
     model = AutoModelForCausalLM.from_pretrained(
         "meta-llama/Llama-2-7b-chat-hf",
         torch_dtype=torch.float16,  # Use float16 for reduced memory usage
         device_map="auto",
+        use_auth_token=HF_API_TOKEN  # Use the secret token
     )
     return tokenizer, model