shah1zil commited on
Commit
4551845
β€’
1 Parent(s): c04653d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -3
app.py CHANGED
@@ -7,6 +7,8 @@ from groq import Groq
7
  from PyPDF2 import PdfReader
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import torch
 
 
10
 
11
  # Set up environment variables
12
  os.environ["GROQ_API_KEY"] = "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"
@@ -18,14 +20,37 @@ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
18
  whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
19
 
20
  # Initialize the tokenizer and model from the saved checkpoint for RAG
 
 
 
 
 
 
 
 
 
 
21
  rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
22
- rag_model = AutoModelForCausalLM.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
23
  "himmeow/vi-gemma-2b-RAG",
24
  device_map="auto",
25
- torch_dtype=torch.bfloat16
 
26
  )
27
 
28
- # Use GPU if available for RAG model
29
  if torch.cuda.is_available():
30
  rag_model.to("cuda")
31
 
 
7
  from PyPDF2 import PdfReader
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+ from accelerate import init_empty_weights, load_checkpoint_and_dispatch
12
 
13
  # Set up environment variables
14
  os.environ["GROQ_API_KEY"] = "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"
 
20
  whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
21
 
22
  # Initialize the tokenizer and model from the saved checkpoint for RAG
23
+ # Updated model loading code with disk offloading
24
+
25
+
26
+ # Specify the folder where offloaded model parts will be stored
27
+ offload_folder = "./offload"
28
+
29
+ # Ensure the offload folder exists
30
+ os.makedirs(offload_folder, exist_ok=True)
31
+
32
+ # Initialize the tokenizer
33
  rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
34
+
35
+ # Initialize empty weights with the specified offload folder
36
+ with init_empty_weights():
37
+ rag_model = AutoModelForCausalLM.from_pretrained(
38
+ "himmeow/vi-gemma-2b-RAG",
39
+ torch_dtype=torch.bfloat16,
40
+ device_map="auto",
41
+ offload_folder=offload_folder
42
+ )
43
+
44
+ # Dispatch the model with offloading options
45
+ rag_model = load_checkpoint_and_dispatch(
46
+ rag_model,
47
  "himmeow/vi-gemma-2b-RAG",
48
  device_map="auto",
49
+ offload_folder=offload_folder,
50
+ offload_state_dict=True
51
  )
52
 
53
+ # Use GPU if available
54
  if torch.cuda.is_available():
55
  rag_model.to("cuda")
56