Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -29,17 +29,17 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
29 |
peft_model_id = "charansr/llama2-7b-chat-hf-therapist"
|
30 |
|
31 |
config = PeftConfig.from_pretrained(peft_model_id,
|
32 |
-
use_auth_token="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", load_in_8bit=True
|
33 |
|
34 |
-
newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True,
|
35 |
-
use_auth_token="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")
|
36 |
|
37 |
newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
|
38 |
-
use_auth_token="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", load_in_8bit=True
|
39 |
|
40 |
# Load the Lora model
|
41 |
newmodel = PeftModel.from_pretrained(newmodel, peft_model_id,
|
42 |
-
use_auth_token="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", load_in_8bit=True
|
43 |
|
44 |
def givetext(input_text,lmodel,ltokenizer):
|
45 |
try:
|
@@ -61,13 +61,13 @@ def givetext(input_text,lmodel,ltokenizer):
|
|
61 |
with torch.no_grad():
|
62 |
#print("BEFORE RETURNING")
|
63 |
|
64 |
-
print("BEFORE ATTEMPTING TO MOVE LMODEL TO CPU")
|
65 |
|
66 |
-
lmodel = lmodel.to("cpu")
|
67 |
|
68 |
-
print("BEFORE ATTEMPTING .cpu()")
|
69 |
|
70 |
-
lmodel.cpu()
|
71 |
|
72 |
print("BEFORE GENERATING LMODEL")
|
73 |
|
|
|
29 |
peft_model_id = "charansr/llama2-7b-chat-hf-therapist"
|
30 |
|
31 |
config = PeftConfig.from_pretrained(peft_model_id,
|
32 |
+
use_auth_token="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", load_in_8bit=True).to("cpu")
|
33 |
|
34 |
+
newmodel = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True,
|
35 |
+
use_auth_token="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").to("cpu")
|
36 |
|
37 |
newtokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
|
38 |
+
use_auth_token="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", load_in_8bit=True).to("cpu")
|
39 |
|
40 |
# Load the Lora model
|
41 |
newmodel = PeftModel.from_pretrained(newmodel, peft_model_id,
|
42 |
+
use_auth_token="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", load_in_8bit=True).to("cpu")
|
43 |
|
44 |
def givetext(input_text,lmodel,ltokenizer):
|
45 |
try:
|
|
|
61 |
with torch.no_grad():
|
62 |
#print("BEFORE RETURNING")
|
63 |
|
64 |
+
#print("BEFORE ATTEMPTING TO MOVE LMODEL TO CPU")
|
65 |
|
66 |
+
#lmodel = lmodel.to("cpu")
|
67 |
|
68 |
+
#print("BEFORE ATTEMPTING .cpu()")
|
69 |
|
70 |
+
#lmodel.cpu()
|
71 |
|
72 |
print("BEFORE GENERATING LMODEL")
|
73 |
|