shah1zil commited on
Commit
c630b36
β€’
1 Parent(s): 4551845

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -10,6 +10,7 @@ import torch
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
  from accelerate import init_empty_weights, load_checkpoint_and_dispatch
12
 
 
13
  # Set up environment variables
14
  os.environ["GROQ_API_KEY"] = "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"
15
 
@@ -23,6 +24,10 @@ whisper_model = whisper.load_model("base") # You can choose other models like "
23
  # Updated model loading code with disk offloading
24
 
25
 
 
 
 
 
26
  # Specify the folder where offloaded model parts will be stored
27
  offload_folder = "./offload"
28
 
@@ -32,8 +37,9 @@ os.makedirs(offload_folder, exist_ok=True)
32
  # Initialize the tokenizer
33
  rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
34
 
35
- # Initialize empty weights with the specified offload folder
36
  with init_empty_weights():
 
37
  rag_model = AutoModelForCausalLM.from_pretrained(
38
  "himmeow/vi-gemma-2b-RAG",
39
  torch_dtype=torch.bfloat16,
@@ -41,7 +47,7 @@ with init_empty_weights():
41
  offload_folder=offload_folder
42
  )
43
 
44
- # Dispatch the model with offloading options
45
  rag_model = load_checkpoint_and_dispatch(
46
  rag_model,
47
  "himmeow/vi-gemma-2b-RAG",
@@ -50,9 +56,16 @@ rag_model = load_checkpoint_and_dispatch(
50
  offload_state_dict=True
51
  )
52
 
53
- # Use GPU if available
 
 
 
 
 
 
 
54
  if torch.cuda.is_available():
55
- rag_model.to("cuda")
56
 
57
  # Load PDF content
58
  def load_pdf(pdf_path):
 
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
  from accelerate import init_empty_weights, load_checkpoint_and_dispatch
12
 
13
+
14
  # Set up environment variables
15
  os.environ["GROQ_API_KEY"] = "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"
16
 
 
24
  # Updated model loading code with disk offloading
25
 
26
 
27
+ # Specify the folder where offloaded model parts will be stored
28
+ offload_folder = "./offload"
29
+
30
+
31
  # Specify the folder where offloaded model parts will be stored
32
  offload_folder = "./offload"
33
 
 
37
  # Initialize the tokenizer
38
  rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
39
 
40
+ # Initialize empty weights context
41
  with init_empty_weights():
42
+ # Load the model with meta tensors
43
  rag_model = AutoModelForCausalLM.from_pretrained(
44
  "himmeow/vi-gemma-2b-RAG",
45
  torch_dtype=torch.bfloat16,
 
47
  offload_folder=offload_folder
48
  )
49
 
50
+ # Dispatch the model, ensuring correct device placement and weight loading
51
  rag_model = load_checkpoint_and_dispatch(
52
  rag_model,
53
  "himmeow/vi-gemma-2b-RAG",
 
56
  offload_state_dict=True
57
  )
58
 
59
+ # Ensure weights are properly tied if necessary
60
+ if hasattr(rag_model, 'tie_weights'):
61
+ rag_model.tie_weights()
62
+
63
+ # Use `to_empty()` to move the model out of the meta state correctly
64
+ rag_model = rag_model.to_empty()
65
+
66
+ # Move model to GPU if available
67
  if torch.cuda.is_available():
68
+ rag_model = rag_model.to("cuda")
69
 
70
  # Load PDF content
71
  def load_pdf(pdf_path):