Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import torch
|
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
12 |
|
|
|
13 |
# Set up environment variables
|
14 |
os.environ["GROQ_API_KEY"] = "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"
|
15 |
|
@@ -23,6 +24,10 @@ whisper_model = whisper.load_model("base") # You can choose other models like "
|
|
23 |
# Updated model loading code with disk offloading
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
26 |
# Specify the folder where offloaded model parts will be stored
|
27 |
offload_folder = "./offload"
|
28 |
|
@@ -32,8 +37,9 @@ os.makedirs(offload_folder, exist_ok=True)
|
|
32 |
# Initialize the tokenizer
|
33 |
rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
|
34 |
|
35 |
-
# Initialize empty weights
|
36 |
with init_empty_weights():
|
|
|
37 |
rag_model = AutoModelForCausalLM.from_pretrained(
|
38 |
"himmeow/vi-gemma-2b-RAG",
|
39 |
torch_dtype=torch.bfloat16,
|
@@ -41,7 +47,7 @@ with init_empty_weights():
|
|
41 |
offload_folder=offload_folder
|
42 |
)
|
43 |
|
44 |
-
# Dispatch the model
|
45 |
rag_model = load_checkpoint_and_dispatch(
|
46 |
rag_model,
|
47 |
"himmeow/vi-gemma-2b-RAG",
|
@@ -50,9 +56,16 @@ rag_model = load_checkpoint_and_dispatch(
|
|
50 |
offload_state_dict=True
|
51 |
)
|
52 |
|
53 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
if torch.cuda.is_available():
|
55 |
-
rag_model.to("cuda")
|
56 |
|
57 |
# Load PDF content
|
58 |
def load_pdf(pdf_path):
|
|
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
12 |
|
13 |
+
|
14 |
# Set up environment variables
|
15 |
os.environ["GROQ_API_KEY"] = "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"
|
16 |
|
|
|
24 |
# Updated model loading code with disk offloading
|
25 |
|
26 |
|
27 |
+
# Specify the folder where offloaded model parts will be stored
|
28 |
+
offload_folder = "./offload"
|
29 |
+
|
30 |
+
|
31 |
# Specify the folder where offloaded model parts will be stored
|
32 |
offload_folder = "./offload"
|
33 |
|
|
|
37 |
# Initialize the tokenizer
|
38 |
rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
|
39 |
|
40 |
+
# Initialize empty weights context
|
41 |
with init_empty_weights():
|
42 |
+
# Load the model with meta tensors
|
43 |
rag_model = AutoModelForCausalLM.from_pretrained(
|
44 |
"himmeow/vi-gemma-2b-RAG",
|
45 |
torch_dtype=torch.bfloat16,
|
|
|
47 |
offload_folder=offload_folder
|
48 |
)
|
49 |
|
50 |
+
# Dispatch the model, ensuring correct device placement and weight loading
|
51 |
rag_model = load_checkpoint_and_dispatch(
|
52 |
rag_model,
|
53 |
"himmeow/vi-gemma-2b-RAG",
|
|
|
56 |
offload_state_dict=True
|
57 |
)
|
58 |
|
59 |
+
# Ensure weights are properly tied if necessary
|
60 |
+
if hasattr(rag_model, 'tie_weights'):
|
61 |
+
rag_model.tie_weights()
|
62 |
+
|
63 |
+
# Use `to_empty()` to move the model out of the meta state correctly
|
64 |
+
rag_model = rag_model.to_empty()
|
65 |
+
|
66 |
+
# Move model to GPU if available
|
67 |
if torch.cuda.is_available():
|
68 |
+
rag_model = rag_model.to("cuda")
|
69 |
|
70 |
# Load PDF content
|
71 |
def load_pdf(pdf_path):
|