V1.8
Browse filesUpdate context for efficiency.
- modules/pmbl.py +4 -4
modules/pmbl.py
CHANGED
@@ -107,7 +107,7 @@ class PMBL:
|
|
107 |
response = llm(
|
108 |
system_prompt,
|
109 |
max_tokens=1500,
|
110 |
-
temperature=0.
|
111 |
stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
|
112 |
echo=False,
|
113 |
stream=True
|
@@ -122,8 +122,8 @@ class PMBL:
|
|
122 |
self.save_chat_history(prompt, response_text)
|
123 |
|
124 |
def calculate_context(self, system_prompt, formatted_history):
|
125 |
-
system_prompt_tokens = len(system_prompt) //
|
126 |
-
history_tokens = len(formatted_history) //
|
127 |
max_response_tokens = 1500
|
128 |
context_ceiling = 31690
|
129 |
|
@@ -148,7 +148,7 @@ class PMBL:
|
|
148 |
conn.close()
|
149 |
|
150 |
def generate_topic(self, prompt, response):
|
151 |
-
llm = Llama(model_path=self.model_path, n_ctx=
|
152 |
|
153 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
154 |
|
|
|
107 |
response = llm(
|
108 |
system_prompt,
|
109 |
max_tokens=1500,
|
110 |
+
temperature=0.2,
|
111 |
stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
|
112 |
echo=False,
|
113 |
stream=True
|
|
|
122 |
self.save_chat_history(prompt, response_text)
|
123 |
|
124 |
def calculate_context(self, system_prompt, formatted_history):
|
125 |
+
system_prompt_tokens = len(system_prompt) // 6
|
126 |
+
history_tokens = len(formatted_history) // 12
|
127 |
max_response_tokens = 1500
|
128 |
context_ceiling = 31690
|
129 |
|
|
|
148 |
conn.close()
|
149 |
|
150 |
def generate_topic(self, prompt, response):
|
151 |
+
llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8)
|
152 |
|
153 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
154 |
|