doubledsbv commited on
Commit
e032123
1 Parent(s): 090e573

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -16
README.md CHANGED
@@ -91,29 +91,39 @@ dtype: bfloat16
91
  random_seed: 0
92
  ```
93
 
94
- ## 💻 Usage
95
 
96
  ```python
97
- !pip install -qU transformers accelerate
98
 
99
- from transformers import AutoTokenizer
100
- import transformers
101
  import torch
 
102
 
103
- model = "seedboxai/KafkaLM-7B-DARE_TIES-LaserRMT-QLoRA-DPO-v0.5"
104
- messages = [{"role": "user", "content": "Was ist der Sinn des Lebens?"}]
105
-
106
- tokenizer = AutoTokenizer.from_pretrained(model)
107
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
108
- pipeline = transformers.pipeline(
109
- "text-generation",
110
- model=model,
111
- torch_dtype=torch.float16,
112
- device_map="auto",
113
  )
114
 
115
- outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
116
- print(outputs[0]["generated_text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  ```
118
 
119
  ## Disclaimer
 
91
  random_seed: 0
92
  ```
93
 
94
+ ## 💻 Usage (fast vLLM inference example)
95
 
96
  ```python
97
+ !pip install -qU vllm
98
 
 
 
99
  import torch
100
+ from vllm import LLM, SamplingParams
101
 
102
+ sampling_params = SamplingParams(
103
+ temperature=0.7,
104
+ top_p=0.95,
105
+ top_k=50,
106
+ max_tokens=512,
 
 
 
 
 
107
  )
108
 
109
+ llm = LLM(model="doubledsbv/KafkaLM-7B-DARE_TIES-DPO-v0.5-AWQ", quantization = "awq", dtype=torch.float16)
110
+
111
+
112
+ def generate_prompt(input, sys_prompt = None):
113
+ prompt = ''
114
+ if not sys_prompt:
115
+ sys_prompt = "Du bist ein freundlicher und hilfsbereiter KI-Assistent. Du beantwortest Fragen faktenorientiert, präzise und ausführlich."
116
+
117
+ prompt += f"<|system|>\n{sys_prompt.strip()}</s>\n"
118
+ prompt += f"<|user|>\n{input.strip()}</s>\n"
119
+ prompt += f"<|assistant|>\n"
120
+
121
+ return prompt
122
+
123
+ outputs = llm.generate(generate_prompt("Was ist der Unterschied zwischen Ironie und Sarkasmus?"), sampling_params)
124
+ primt(outputs[0].outputs[0].text.strip())
125
+
126
+
127
  ```
128
 
129
  ## Disclaimer