Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ from groq import Groq
|
|
7 |
from PyPDF2 import PdfReader
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
import torch
|
|
|
|
|
10 |
|
11 |
# Set up environment variables
|
12 |
os.environ["GROQ_API_KEY"] = "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"
|
@@ -18,14 +20,37 @@ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
|
18 |
whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
|
19 |
|
20 |
# Initialize the tokenizer and model from the saved checkpoint for RAG
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
"himmeow/vi-gemma-2b-RAG",
|
24 |
device_map="auto",
|
25 |
-
|
|
|
26 |
)
|
27 |
|
28 |
-
# Use GPU if available
|
29 |
if torch.cuda.is_available():
|
30 |
rag_model.to("cuda")
|
31 |
|
|
|
7 |
from PyPDF2 import PdfReader
|
8 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
import torch
|
10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
11 |
+
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
12 |
|
13 |
# Set up environment variables
|
14 |
os.environ["GROQ_API_KEY"] = "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"
|
|
|
20 |
whisper_model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
|
21 |
|
22 |
# Initialize the tokenizer and model from the saved checkpoint for RAG
|
23 |
+
# Updated model loading code with disk offloading
|
24 |
+
|
25 |
+
|
26 |
+
# Specify the folder where offloaded model parts will be stored
|
27 |
+
offload_folder = "./offload"
|
28 |
+
|
29 |
+
# Ensure the offload folder exists
|
30 |
+
os.makedirs(offload_folder, exist_ok=True)
|
31 |
+
|
32 |
+
# Initialize the tokenizer
|
33 |
rag_tokenizer = AutoTokenizer.from_pretrained("himmeow/vi-gemma-2b-RAG")
|
34 |
+
|
35 |
+
# Initialize empty weights with the specified offload folder
|
36 |
+
with init_empty_weights():
|
37 |
+
rag_model = AutoModelForCausalLM.from_pretrained(
|
38 |
+
"himmeow/vi-gemma-2b-RAG",
|
39 |
+
torch_dtype=torch.bfloat16,
|
40 |
+
device_map="auto",
|
41 |
+
offload_folder=offload_folder
|
42 |
+
)
|
43 |
+
|
44 |
+
# Dispatch the model with offloading options
|
45 |
+
rag_model = load_checkpoint_and_dispatch(
|
46 |
+
rag_model,
|
47 |
"himmeow/vi-gemma-2b-RAG",
|
48 |
device_map="auto",
|
49 |
+
offload_folder=offload_folder,
|
50 |
+
offload_state_dict=True
|
51 |
)
|
52 |
|
53 |
+
# Use GPU if available
|
54 |
if torch.cuda.is_available():
|
55 |
rag_model.to("cuda")
|
56 |
|