Spaces:
Runtime error
Runtime error
alfonsovelp
commited on
Commit
•
4208f3e
1
Parent(s):
ba5b63f
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,22 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
# Environment variable for HF token
|
6 |
hf_token = os.environ.get("HF_TOKEN")
|
7 |
|
8 |
# Your model ID
|
9 |
-
model_id = "mistralai/Mistral-7B-Instruct-v0.
|
10 |
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
|
17 |
def format_prompt(message, history):
|
18 |
prompt = "<s>"
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
# Environment variable for HF token
|
6 |
hf_token = os.environ.get("HF_TOKEN")
|
7 |
|
8 |
# Your model ID
|
9 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
10 |
|
11 |
|
12 |
+
quantization_config = BitsAndBytesConfig(
|
13 |
+
load_in_4bit=True,
|
14 |
+
bnb_4bit_quant_type="nf4",
|
15 |
+
bnb_4bit_compute_dtype="torch.float16",
|
16 |
+
)
|
17 |
|
18 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=True, device_map="auto", token=hf_token)
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
|
20 |
|
21 |
def format_prompt(message, history):
|
22 |
prompt = "<s>"
|