Spaces:
Runtime error
Runtime error
alfonsovelp
commited on
Commit
•
1539ae0
1
Parent(s):
4208f3e
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
|
|
5 |
# Environment variable for HF token
|
6 |
hf_token = os.environ.get("HF_TOKEN")
|
7 |
|
@@ -12,7 +13,7 @@ model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
|
12 |
quantization_config = BitsAndBytesConfig(
|
13 |
load_in_4bit=True,
|
14 |
bnb_4bit_quant_type="nf4",
|
15 |
-
bnb_4bit_compute_dtype=
|
16 |
)
|
17 |
|
18 |
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=True, device_map="auto", token=hf_token)
|
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
+
import torch
|
6 |
# Environment variable for HF token
|
7 |
hf_token = os.environ.get("HF_TOKEN")
|
8 |
|
|
|
13 |
quantization_config = BitsAndBytesConfig(
|
14 |
load_in_4bit=True,
|
15 |
bnb_4bit_quant_type="nf4",
|
16 |
+
bnb_4bit_compute_dtype=torch.float16,
|
17 |
)
|
18 |
|
19 |
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=True, device_map="auto", token=hf_token)
|