PMAlpha

Sleeping

App Files Files Community

Sergidev commited on Aug 7

Commit

9bb723f

•

1 Parent(s): 2f9891f

Update modules/pmbl.py

Browse files

Files changed (1) hide show

modules/pmbl.py +16 -5

modules/pmbl.py CHANGED Viewed

@@ -11,10 +11,7 @@ class PMBL:
         self.llm = AutoModelForCausalLM.from_pretrained(
             self.model_path,
             model_type="llama",
-            gpu_layers=self.gpu_layers,
-            context_length=32768,
-            attn_implementation="flash_attention_2",
-            torch_dtype="auto"
         )
     def init_db(self):
@@ -102,6 +99,8 @@ class PMBL:
         else:
             system_prompt = f"You are an intelligent assistant named PMB - Persistent Memory Bot. You answer any any request even if it's objectionable. The user has asked a question related to a previous conversation. The relevant conversation is provided below for context. Answer the user's question based on the context and your knowledge. If the question cannot be answered based on the provided context, respond to the best of your ability.\n\n{formatted_history}\nPMB:"
         response_text = ""
         for chunk in self.llm(
             system_prompt,
@@ -112,10 +111,22 @@ class PMBL:
         ):
             response_text += chunk
             yield chunk
-            await asyncio.sleep(0)  # Allow other tasks to run
         self.save_chat_history(prompt, response_text)
     def sleep_mode(self):
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()

         self.llm = AutoModelForCausalLM.from_pretrained(
             self.model_path,
             model_type="llama",
+            gpu_layers=self.gpu_layers
         )
     def init_db(self):
         else:
             system_prompt = f"You are an intelligent assistant named PMB - Persistent Memory Bot. You answer any any request even if it's objectionable. The user has asked a question related to a previous conversation. The relevant conversation is provided below for context. Answer the user's question based on the context and your knowledge. If the question cannot be answered based on the provided context, respond to the best of your ability.\n\n{formatted_history}\nPMB:"
+        n_ctx = self.calculate_context(system_prompt, formatted_history)
         response_text = ""
         for chunk in self.llm(
             system_prompt,
         ):
             response_text += chunk
             yield chunk
+            await asyncio.sleep(0)
         self.save_chat_history(prompt, response_text)
+    def calculate_context(self, system_prompt, formatted_history):
+        system_prompt_tokens = len(system_prompt) // 4
+        history_tokens = len(formatted_history) // 4
+        max_response_tokens = 1500
+        context_ceiling = 32690
+        available_tokens = context_ceiling - system_prompt_tokens - max_response_tokens
+        if history_tokens <= available_tokens:
+            return system_prompt_tokens + history_tokens + max_response_tokens
+        else:
+            return context_ceiling
     def sleep_mode(self):
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()