Waseem7711 commited on
Commit
c30ad40
1 Parent(s): b6e344f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -93
app.py CHANGED
@@ -1,104 +1,34 @@
1
  import streamlit as st
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- import os
5
- from dotenv import load_dotenv
6
 
7
- # Load environment variables
8
- load_dotenv()
9
-
10
- # Retrieve Hugging Face API token from environment variables
11
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
12
-
13
-
14
-
15
- import os
16
-
17
- # Access Hugging Face API Key from Hugging Face Secrets
18
- HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
19
-
20
- if not HUGGINGFACE_API_KEY:
21
- raise ValueError("Hugging Face API Key not found. Please set it in the Hugging Face Secrets.")
22
-
23
- # Now you can use the API key securely in your code
24
-
25
-
26
- # Streamlit app setup
27
- st.title('Llama2 Chatbot Deployment on Hugging Face Spaces')
28
- st.write("This chatbot is powered by the Llama2 model. Ask me anything!")
29
 
 
30
  @st.cache_resource
31
- def load_model():
32
- """
33
- Load the tokenizer and model from Hugging Face.
34
- This function is cached to prevent re-loading on every interaction.
35
- """
36
- tokenizer = AutoTokenizer.from_pretrained(
37
- "meta-llama/Llama-2-7b-chat-hf",
38
- use_auth_token=HF_API_TOKEN # Use the secret token
39
- )
40
- model = AutoModelForCausalLM.from_pretrained(
41
- "meta-llama/Llama-2-7b-chat-hf",
42
- torch_dtype=torch.float16, # Use float16 for reduced memory usage
43
- device_map="auto",
44
- use_auth_token=HF_API_TOKEN # Use the secret token
45
- )
46
  return tokenizer, model
47
 
48
- # Load the model and tokenizer
49
- tokenizer, model = load_model()
 
 
 
 
 
50
 
51
- # Initialize session state for conversation history
52
- if "conversation" not in st.session_state:
53
- st.session_state.conversation = []
54
 
55
- # User input
56
- user_input = st.text_input("You:", "")
57
 
58
- if user_input:
59
- st.session_state.conversation.append({"role": "user", "content": user_input})
60
  with st.spinner("Generating response..."):
61
- try:
62
- # Prepare the conversation history for the model
63
- conversation_text = ""
64
- for message in st.session_state.conversation:
65
- if message["role"] == "user":
66
- conversation_text += f"User: {message['content']}\n"
67
- elif message["role"] == "assistant":
68
- conversation_text += f"Assistant: {message['content']}\n"
69
-
70
- # Encode the input
71
- inputs = tokenizer.encode(conversation_text + "Assistant:", return_tensors="pt").to(model.device)
72
-
73
- # Generate a response
74
- output = model.generate(
75
- inputs,
76
- max_length=1000,
77
- temperature=0.7,
78
- top_p=0.9,
79
- do_sample=True,
80
- eos_token_id=tokenizer.eos_token_id,
81
- pad_token_id=tokenizer.eos_token_id # To avoid warnings
82
- )
83
-
84
- # Decode the response
85
- response = tokenizer.decode(output[0], skip_special_tokens=True)
86
-
87
- # Extract the assistant's reply
88
- assistant_reply = response[len(conversation_text + "Assistant: "):].strip()
89
-
90
- # Append the assistant's reply to the conversation history
91
- st.session_state.conversation.append({"role": "assistant", "content": assistant_reply})
92
-
93
- # Display the updated conversation
94
- conversation_display = ""
95
- for message in st.session_state.conversation:
96
- if message["role"] == "user":
97
- conversation_display += f"**You:** {message['content']}\n\n"
98
- elif message["role"] == "assistant":
99
- conversation_display += f"**Bot:** {message['content']}\n\n"
100
-
101
- st.markdown(conversation_display)
102
-
103
- except Exception as e:
104
- st.error(f"An error occurred: {e}")
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, LlamaForCausalLM
3
  import torch
 
 
4
 
5
+ # Title of the app
6
+ st.title("LLaMA 2 Chatbot")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Load the LLaMA model and tokenizer from Hugging Face
9
  @st.cache_resource
10
+ def load_model_and_tokenizer():
11
+ # Load the model and tokenizer
12
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
13
+ model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
 
 
 
 
 
 
 
 
 
 
 
14
  return tokenizer, model
15
 
16
+ # Function to generate text based on a prompt
17
+ def generate_text(prompt, tokenizer, model):
18
+ inputs = tokenizer(prompt, return_tensors="pt")
19
+ # Generate text
20
+ with torch.no_grad():
21
+ generate_ids = model.generate(inputs.input_ids, max_length=50)
22
+ return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
23
 
24
+ # Input field for user prompt
25
+ user_input = st.text_input("Enter your prompt:", "Hey, are you conscious? Can you talk to me?")
 
26
 
27
+ # Load model and tokenizer
28
+ tokenizer, model = load_model_and_tokenizer()
29
 
30
+ # Generate response when user enters a prompt
31
+ if st.button("Generate Response"):
32
  with st.spinner("Generating response..."):
33
+ response = generate_text(user_input, tokenizer, model)
34
+ st.write(f"Response: {response}")