zeroGPU v1
Browse files- modules/pmbl.py +8 -2
modules/pmbl.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import sqlite3
|
2 |
from datetime import datetime
|
3 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
4 |
|
5 |
class PMBL:
|
6 |
def __init__(self, model_path):
|
@@ -100,9 +102,11 @@ class PMBL:
|
|
100 |
|
101 |
for chunk in response.result():
|
102 |
yield chunk
|
103 |
-
|
|
|
104 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
105 |
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, mlock=True)
|
|
|
106 |
|
107 |
response = llm(
|
108 |
system_prompt,
|
@@ -146,9 +150,11 @@ class PMBL:
|
|
146 |
conn.commit()
|
147 |
|
148 |
conn.close()
|
149 |
-
|
|
|
150 |
def generate_topic(self, prompt, response):
|
151 |
llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, n_gpu_layers=-1, mlock=True)
|
|
|
152 |
|
153 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
154 |
|
|
|
1 |
import sqlite3
|
2 |
from datetime import datetime
|
3 |
from concurrent.futures import ThreadPoolExecutor
|
4 |
+
import torch
|
5 |
+
from huggingface_hub import spaces
|
6 |
|
7 |
class PMBL:
|
8 |
def __init__(self, model_path):
|
|
|
102 |
|
103 |
for chunk in response.result():
|
104 |
yield chunk
|
105 |
+
|
106 |
+
@spaces.gpu
|
107 |
def generate_response_task(self, system_prompt, prompt, n_ctx):
|
108 |
llm = Llama(model_path=self.model_path, n_ctx=n_ctx, n_threads=8, n_gpu_layers=-1, mlock=True)
|
109 |
+
llm = llm.to("cuda") # Move the model to the GPU
|
110 |
|
111 |
response = llm(
|
112 |
system_prompt,
|
|
|
150 |
conn.commit()
|
151 |
|
152 |
conn.close()
|
153 |
+
|
154 |
+
@spaces.gpu
|
155 |
def generate_topic(self, prompt, response):
|
156 |
llm = Llama(model_path=self.model_path, n_ctx=1690, n_threads=8, n_gpu_layers=-1, mlock=True)
|
157 |
+
llm = llm.to("cuda") # Move the model to the GPU
|
158 |
|
159 |
system_prompt = f"Based on the following interaction between a user and an AI assistant, generate a concise topic for the conversation in 2-4 words:\n\nUser: {prompt}\nAssistant: {response}\n\nTopic:"
|
160 |
|