Spaces:

shah1zil
/

AgrI_Assistant

Runtime error

shah1zil commited on Sep 1

Commit

c630b36

•

1 Parent(s): 4551845

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 # Set up environment variables
 os.environ["GROQ_API_KEY"] = "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"
@@ -23,6 +24,10 @@ whisper_model = whisper.load_model("base")  # You can choose other models like "
 # Updated model loading code with disk offloading
 # Specify the folder where offloaded model parts will be stored
 offload_folder = "./offload"
@@ -32,8 +37,9 @@ os.makedirs(offload_folder, exist_ok=True)
 # Initialize the tokenizer
 rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
-# Initialize empty weights with the specified offload folder
 with init_empty_weights():
     rag_model = AutoModelForCausalLM.from_pretrained(
         "himmeow/vi-gemma-2b-RAG",
         torch_dtype=torch.bfloat16,
@@ -41,7 +47,7 @@ with init_empty_weights():
         offload_folder=offload_folder
     )
-# Dispatch the model with offloading options
 rag_model = load_checkpoint_and_dispatch(
     rag_model,
     "himmeow/vi-gemma-2b-RAG",
@@ -50,9 +56,16 @@ rag_model = load_checkpoint_and_dispatch(
     offload_state_dict=True
 )
-# Use GPU if available
 if torch.cuda.is_available():
-    rag_model.to("cuda")
 # Load PDF content
 def load_pdf(pdf_path):

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 # Set up environment variables
 os.environ["GROQ_API_KEY"] = "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"
 # Updated model loading code with disk offloading
+# Specify the folder where offloaded model parts will be stored
+offload_folder = "./offload"
 # Specify the folder where offloaded model parts will be stored
 offload_folder = "./offload"
 # Initialize the tokenizer
 rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
+# Initialize empty weights context
 with init_empty_weights():
+    # Load the model with meta tensors
     rag_model = AutoModelForCausalLM.from_pretrained(
         "himmeow/vi-gemma-2b-RAG",
         torch_dtype=torch.bfloat16,
         offload_folder=offload_folder
     )
+# Dispatch the model, ensuring correct device placement and weight loading
 rag_model = load_checkpoint_and_dispatch(
     rag_model,
     "himmeow/vi-gemma-2b-RAG",
     offload_state_dict=True
 )
+# Ensure weights are properly tied if necessary
+if hasattr(rag_model, 'tie_weights'):
+    rag_model.tie_weights()
+# Use `to_empty()` to move the model out of the meta state correctly
+rag_model = rag_model.to_empty()
+# Move model to GPU if available
 if torch.cuda.is_available():
+    rag_model = rag_model.to("cuda")
 # Load PDF content
 def load_pdf(pdf_path):