doubledsbv
commited on
Commit
•
e032123
1
Parent(s):
090e573
Update README.md
Browse files
README.md
CHANGED
@@ -91,29 +91,39 @@ dtype: bfloat16
|
|
91 |
random_seed: 0
|
92 |
```
|
93 |
|
94 |
-
## 💻 Usage
|
95 |
|
96 |
```python
|
97 |
-
!pip install -qU
|
98 |
|
99 |
-
from transformers import AutoTokenizer
|
100 |
-
import transformers
|
101 |
import torch
|
|
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
pipeline = transformers.pipeline(
|
109 |
-
"text-generation",
|
110 |
-
model=model,
|
111 |
-
torch_dtype=torch.float16,
|
112 |
-
device_map="auto",
|
113 |
)
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
```
|
118 |
|
119 |
## Disclaimer
|
|
|
91 |
random_seed: 0
|
92 |
```
|
93 |
|
94 |
+
## 💻 Usage (fast vLLM inference example)
|
95 |
|
96 |
```python
|
97 |
+
!pip install -qU vllm
|
98 |
|
|
|
|
|
99 |
import torch
|
100 |
+
from vllm import LLM, SamplingParams
|
101 |
|
102 |
+
sampling_params = SamplingParams(
|
103 |
+
temperature=0.7,
|
104 |
+
top_p=0.95,
|
105 |
+
top_k=50,
|
106 |
+
max_tokens=512,
|
|
|
|
|
|
|
|
|
|
|
107 |
)
|
108 |
|
109 |
+
llm = LLM(model="doubledsbv/KafkaLM-7B-DARE_TIES-DPO-v0.5-AWQ", quantization = "awq", dtype=torch.float16)
|
110 |
+
|
111 |
+
|
112 |
+
def generate_prompt(input, sys_prompt = None):
|
113 |
+
prompt = ''
|
114 |
+
if not sys_prompt:
|
115 |
+
sys_prompt = "Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert, präzise und ausführlich."
|
116 |
+
|
117 |
+
prompt += f"<|system|>\n{sys_prompt.strip()}</s>\n"
|
118 |
+
prompt += f"<|user|>\n{input.strip()}</s>\n"
|
119 |
+
prompt += f"<|assistant|>\n"
|
120 |
+
|
121 |
+
return prompt
|
122 |
+
|
123 |
+
outputs = llm.generate(generate_prompt("Was ist der Unterschied zwischen Ironie und Sarkasmus?"), sampling_params)
|
124 |
+
primt(outputs[0].outputs[0].text.strip())
|
125 |
+
|
126 |
+
|
127 |
```
|
128 |
|
129 |
## Disclaimer
|