Sergidev commited on
Commit
24a2788
1 Parent(s): 8d9af98
Files changed (1) hide show
  1. modules/pmbl.py +4 -18
modules/pmbl.py CHANGED
@@ -102,18 +102,9 @@ class PMBL:
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
- def generate_response_task(self, system_prompt, prompt, n_ctx):
106
- llm = Llama(
107
- model_path=self.model_path,
108
- n_ctx=n_ctx,
109
- n_threads=8,
110
- n_gpu_layers=-1,
111
- use_mlock=True,
112
- use_mmap=True,
113
- use_flash_attn=True
114
- )
115
-
116
- response = llm.generate(
117
  system_prompt,
118
  max_tokens=1500,
119
  temperature=0.7,
@@ -156,12 +147,7 @@ class PMBL:
156
  conn.close()
157
 
158
  def generate_topic(self, prompt, response):
159
- llm = Llama(
160
- model_path=self.model_path,
161
- n_ctx=n_ctx,
162
- n_threads=2,
163
- n_gpu_layers=2,
164
- )
165
 
166
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
167
 
 
102
  yield chunk
103
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
+ llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, use_flash_attn=True, mlock=True)
106
+
107
+ response = llm(
 
 
 
 
 
 
 
 
 
108
  system_prompt,
109
  max_tokens=1500,
110
  temperature=0.7,
 
147
  conn.close()
148
 
149
  def generate_topic(self, prompt, response):
150
+ llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=2, n_gpu_layers=-1, mlock=True)
 
 
 
 
 
151
 
152
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
153