MergeLlama-7b

Paused

codys12 commited on Oct 18, 2023

Commit

c006930

•

1 Parent(s): 4ccf4f3

remove flash attn

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ MAX_INPUT_TOKEN_LENGTH = 4096
 if torch.cuda.is_available():
     model_id = "codys12/MergeLlama-7b"
-    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, use_flash_attention_2=True, torch_dtype=torch.float16, device_map=0, cache_dir="/data")
     tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
     tokenizer.pad_token = tokenizer.eos_token
     tokenizer.padding_side = "right"

 if torch.cuda.is_available():
     model_id = "codys12/MergeLlama-7b"
+    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map=0, cache_dir="/data")
     tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
     tokenizer.pad_token = tokenizer.eos_token
     tokenizer.padding_side = "right"