Sergidev commited on
Commit
3501e29
1 Parent(s): 19d4d38

Update modules/pmbl.py

Browse files
Files changed (1) hide show
  1. modules/pmbl.py +5 -2
modules/pmbl.py CHANGED
@@ -2,6 +2,7 @@ import sqlite3
2
  from datetime import datetime
3
  from llama_cpp import Llama
4
  from concurrent.futures import ThreadPoolExecutor
 
5
 
6
  class PMBL:
7
  def __init__(self, model_path):
@@ -101,8 +102,9 @@ class PMBL:
101
  for chunk in response.result():
102
  yield chunk
103
 
 
104
  def generate_response_task(self, system_prompt, prompt, n_ctx):
105
- llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, mlock=True)
106
 
107
  response = llm(
108
  system_prompt,
@@ -147,8 +149,9 @@ class PMBL:
147
 
148
  conn.close()
149
 
 
150
  def generate_topic(self, prompt, response):
151
- llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, mlock=True)
152
 
153
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
154
 
 
2
  from datetime import datetime
3
  from llama_cpp import Llama
4
  from concurrent.futures import ThreadPoolExecutor
5
+ import spaces
6
 
7
  class PMBL:
8
  def __init__(self, model_path):
 
102
  for chunk in response.result():
103
  yield chunk
104
 
105
+ @spaces.GPU
106
  def generate_response_task(self, system_prompt, prompt, n_ctx):
107
+ llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, mlock=True, n_gpu_layers=42)
108
 
109
  response = llm(
110
  system_prompt,
 
149
 
150
  conn.close()
151
 
152
+ @spaces.GPU
153
  def generate_topic(self, prompt, response):
154
+ llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, mlock=True, n_gpu_layers=42)
155
 
156
  system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
157