Update modules/pmbl.py
Browse files- modules/pmbl.py +4 -3
modules/pmbl.py
CHANGED
@@ -102,7 +102,7 @@ class PMBL:
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
-
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, offload_kqv=True,
|
106 |
|
107 |
response = llm(
|
108 |
system_prompt,
|
@@ -132,6 +132,7 @@ class PMBL:
|
|
132 |
return system_prompt_tokens + history_tokens + max_response_tokens
|
133 |
else:
|
134 |
return context_ceiling # Return the maximum context size
|
|
|
135 |
def sleep_mode(self):
|
136 |
conn = sqlite3.connect('chat_history.db')
|
137 |
c = conn.cursor()
|
@@ -147,7 +148,7 @@ class PMBL:
|
|
147 |
conn.close()
|
148 |
|
149 |
def generate_topic(self, prompt, response):
|
150 |
-
llm = Llama(model_path=self.model_path, n_ctx=2960, n_threads=4, n_gpu_layers=-1, offload_kqv=True,
|
151 |
|
152 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
153 |
|
@@ -155,7 +156,7 @@ class PMBL:
|
|
155 |
system_prompt,
|
156 |
max_tokens=12,
|
157 |
temperature=0,
|
158 |
-
stop=["
|
159 |
echo=False
|
160 |
)
|
161 |
|
|
|
102 |
yield chunk
|
103 |
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
+
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, offload_kqv=True, use_mlock=True)
|
106 |
|
107 |
response = llm(
|
108 |
system_prompt,
|
|
|
132 |
return system_prompt_tokens + history_tokens + max_response_tokens
|
133 |
else:
|
134 |
return context_ceiling # Return the maximum context size
|
135 |
+
|
136 |
def sleep_mode(self):
|
137 |
conn = sqlite3.connect('chat_history.db')
|
138 |
c = conn.cursor()
|
|
|
148 |
conn.close()
|
149 |
|
150 |
def generate_topic(self, prompt, response):
|
151 |
+
llm = Llama(model_path=self.model_path, n_ctx=2960, n_threads=4, n_gpu_layers=-1, offload_kqv=True, use_mlock=True)
|
152 |
|
153 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
154 |
|
|
|
156 |
system_prompt,
|
157 |
max_tokens=12,
|
158 |
temperature=0,
|
159 |
+
stop=["\n"],
|
160 |
echo=False
|
161 |
)
|
162 |
|