PMAlpha

Sleeping

App Files Files Community

Sergidev commited on Jun 11

Commit

a42bad6

•

1 Parent(s): 2f6a972

Attempt Rollback

Browse files

Files changed (1) hide show

modules/pmbl.py +8 -9

modules/pmbl.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import sqlite3
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
 class PMBL:
@@ -77,8 +78,7 @@ class PMBL:
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        c.execute("INSERT INTO chats (timestamp, prompt, response, topic) VALUES (?, ?, ?, 'Untitled')",
-                  (timestamp, prompt, response))
         conn.commit()
         conn.close()
@@ -102,12 +102,12 @@ class PMBL:
             yield chunk
     def generate_response_task(self, system_prompt, prompt, n_ctx):
-        llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, mlock=True)
         response = llm(
             system_prompt,
             max_tokens=1500,
-            temperature=0.2,
             stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
             echo=False,
             stream=True
@@ -122,17 +122,16 @@ class PMBL:
         self.save_chat_history(prompt, response_text)
     def calculate_context(self, system_prompt, formatted_history):
-        system_prompt_tokens = len(system_prompt) // 3
-        history_tokens = len(formatted_history) // 2
         max_response_tokens = 1500
-        context_ceiling = 31690
         available_tokens = context_ceiling - system_prompt_tokens - max_response_tokens
         if history_tokens <= available_tokens:
             return system_prompt_tokens + history_tokens + max_response_tokens
         else:
             return context_ceiling  # Return the maximum context size
     def sleep_mode(self):
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()
@@ -148,7 +147,7 @@ class PMBL:
         conn.close()
     def generate_topic(self, prompt, response):
-        llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, mlock=True)
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"

 import sqlite3
 from datetime import datetime
+from llama_cpp import Llama
 from concurrent.futures import ThreadPoolExecutor
 class PMBL:
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        c.execute("INSERT INTO chats (timestamp, prompt, response, topic) VALUES (?, ?, ?, 'Untitled')", (timestamp, prompt, response))
         conn.commit()
         conn.close()
             yield chunk
     def generate_response_task(self, system_prompt, prompt, n_ctx):
+        llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8)
         response = llm(
             system_prompt,
             max_tokens=1500,
+            temperature=0.7,
             stop=["</s>", "\nUser:", "\nuser:", "\nSystem:", "\nsystem:"],
             echo=False,
             stream=True
         self.save_chat_history(prompt, response_text)
     def calculate_context(self, system_prompt, formatted_history):
+        system_prompt_tokens = len(system_prompt) // 4
+        history_tokens = len(formatted_history) // 4
         max_response_tokens = 1500
+        context_ceiling = 32690
         available_tokens = context_ceiling - system_prompt_tokens - max_response_tokens
         if history_tokens <= available_tokens:
             return system_prompt_tokens + history_tokens + max_response_tokens
         else:
             return context_ceiling  # Return the maximum context size
     def sleep_mode(self):
         conn = sqlite3.connect('chat_history.db')
         c = conn.cursor()
         conn.close()
     def generate_topic(self, prompt, response):
+        llm = Llama(model_path=self.model_path, n_ctx=2690, n_threads=8)
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"