Mistral-lab

Runtime error

vilarin commited on Jul 20

Commit

254517f

•

1 Parent(s): 1d4c579

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,3 @@
-import subprocess
-subprocess.run(
-    'pip install flash-attn --no-build-isolation',
-    env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
-    shell=True
-)
 import os
 import time
 import spaces
@@ -43,7 +37,6 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL,
     torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
     device_map="auto",
     ignore_mismatched_sizes=True)

 import os
 import time
 import spaces
 model = AutoModelForCausalLM.from_pretrained(
     MODEL,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     ignore_mismatched_sizes=True)