PMAlpha

Sleeping

Sergidev commited on Jun 5

Commit

15af790

•

1 Parent(s): aa235f2

V1.8

Update context for efficiency.

Files changed (1) hide show

modules/pmbl.py CHANGED Viewed

@@ -107,7 +107,7 @@ class PMBL:
         response = llm(
             system_prompt,
             max_tokens=1500,
-            temperature=0.7,
             stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
             echo=False,
             stream=True
@@ -122,8 +122,8 @@ class PMBL:
         self.save_chat_history(prompt, response_text)
     def calculate_context(self, system_prompt, formatted_history):
-        system_prompt_tokens = len(system_prompt) // 2
-        history_tokens = len(formatted_history) // 2
         max_response_tokens = 1500
         context_ceiling = 31690
@@ -148,7 +148,7 @@ class PMBL:
         conn.close()
     def generate_topic(self, prompt, response):
-        llm = Llama(model_path=self.model_path, n_ctx=2690, n_threads=8)
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"

         response = llm(
             system_prompt,
             max_tokens=1500,
+            temperature=0.2,
             stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
             echo=False,
             stream=True
         self.save_chat_history(prompt, response_text)
     def calculate_context(self, system_prompt, formatted_history):
+        system_prompt_tokens = len(system_prompt) // 6
+        history_tokens = len(formatted_history) // 12
         max_response_tokens = 1500
         context_ceiling = 31690
         conn.close()
     def generate_topic(self, prompt, response):
+        llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8)
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"