Update README.md
Browse files
README.md
CHANGED
@@ -16,6 +16,7 @@ The Mistral-7B-Instruct-v0.1 Large Language Model (LLM) is a instruct fine-tuned
|
|
16 |
## Instruction format
|
17 |
```python
|
18 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
19 |
|
20 |
device = "cuda" # the device to load the model onto
|
21 |
|
@@ -30,10 +31,12 @@ model = AutoModelForCausalLM.from_pretrained(model_name,
|
|
30 |
torch_dtype=torch.bfloat16,
|
31 |
)
|
32 |
|
|
|
|
|
33 |
model.config.use_cache = True
|
34 |
def stream(user_prompt):
|
35 |
runtimeFlag = "cuda:0"
|
36 |
-
system_prompt = '
|
37 |
B_INST, E_INST = "[INST]", "[/INST]"
|
38 |
prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"
|
39 |
inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)
|
|
|
16 |
## Instruction format
|
17 |
```python
|
18 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
19 |
+
import torch
|
20 |
|
21 |
device = "cuda" # the device to load the model onto
|
22 |
|
|
|
31 |
torch_dtype=torch.bfloat16,
|
32 |
)
|
33 |
|
34 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_id,add_bos_token=True,trust_remote_code=True)
|
35 |
+
|
36 |
model.config.use_cache = True
|
37 |
def stream(user_prompt):
|
38 |
runtimeFlag = "cuda:0"
|
39 |
+
system_prompt = 'MODULAR_MOJO'
|
40 |
B_INST, E_INST = "[INST]", "[/INST]"
|
41 |
prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"
|
42 |
inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)
|