codys12 commited on
Commit
c006930
1 Parent(s): 4ccf4f3

remove flash attn

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -32,7 +32,7 @@ MAX_INPUT_TOKEN_LENGTH = 4096
32
 
33
  if torch.cuda.is_available():
34
  model_id = "codys12/MergeLlama-7b"
35
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, use_flash_attention_2=True, torch_dtype=torch.float16, device_map=0, cache_dir="/data")
36
  tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
37
  tokenizer.pad_token = tokenizer.eos_token
38
  tokenizer.padding_side = "right"
 
32
 
33
  if torch.cuda.is_available():
34
  model_id = "codys12/MergeLlama-7b"
35
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map=0, cache_dir="/data")
36
  tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf", trust_remote_code=True)
37
  tokenizer.pad_token = tokenizer.eos_token
38
  tokenizer.padding_side = "right"