PMAlpha

Sleeping

Sergidev commited on Aug 5

Commit

9e5bdd2

•

1 Parent(s): 2c4ad30

Update modules/pmbl.py

Files changed (1) hide show

modules/pmbl.py CHANGED Viewed

@@ -102,9 +102,18 @@ class PMBL:
             yield chunk
     def generate_response_task(self, system_prompt, prompt, n_ctx):
-        llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, use_flash_attn=True, mlock=True)
-        response = llm(
             system_prompt,
             max_tokens=1500,
             temperature=0.7,
@@ -147,7 +156,12 @@ class PMBL:
         conn.close()
     def generate_topic(self, prompt, response):
-        llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=2, n_gpu_layers=-1, mlock=True)
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"

             yield chunk
     def generate_response_task(self, system_prompt, prompt, n_ctx):
+        def generate_response_task(self, system_prompt, prompt, n_ctx):
+        llm = Llama(
+            model_path=self.model_path,
+            n_ctx=n_ctx,
+            n_threads=8,
+            n_gpu_layers=-1,
+            use_mlock=True,
+            use_mmap=True,
+            use_flash_attn=True
+        )
+        response = llm.generate(
             system_prompt,
             max_tokens=1500,
             temperature=0.7,
         conn.close()
     def generate_topic(self, prompt, response):
+        llm = Llama(
+            model_path=self.model_path,
+            n_ctx=n_ctx,
+            n_threads=2,
+            n_gpu_layers=2,
+        )
         system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"