Spaces:
Paused
Paused
remove flash attn
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ MAX_INPUT_TOKEN_LENGTH = 4096
|
|
32 |
|
33 |
if torch.cuda.is_available():
|
34 |
model_id = "codys12/MergeLlama-7b"
|
35 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,
|
36 |
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
|
37 |
tokenizer.pad_token = tokenizer.eos_token
|
38 |
tokenizer.padding_side = "right"
|
|
|
32 |
|
33 |
if torch.cuda.is_available():
|
34 |
model_id = "codys12/MergeLlama-7b"
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map=0, cache_dir="/data")
|
36 |
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
|
37 |
tokenizer.pad_token = tokenizer.eos_token
|
38 |
tokenizer.padding_side = "right"
|