Update app.py
Browse files
app.py
CHANGED
@@ -2,19 +2,18 @@ from fastapi import FastAPI, Request
|
|
2 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from modules.pmbl import PMBL
|
5 |
-
import
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
|
11 |
|
12 |
app = FastAPI(docs_url=None, redoc_url=None)
|
13 |
|
14 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
15 |
app.mount("/templates", StaticFiles(directory="templates"), name="templates")
|
16 |
|
17 |
-
pmbl = PMBL("./PMB-7b.Q6_K.gguf") #
|
18 |
|
19 |
@app.head("/")
|
20 |
@app.get("/")
|
|
|
2 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from modules.pmbl import PMBL
|
5 |
+
from llama_cpp import Llama
|
6 |
|
7 |
+
# Check CUDA availability
|
8 |
+
llm = Llama(model_path="./PMB-7b.Q6_K.gguf", n_gpu_layers=-1)
|
9 |
+
print(f"CUDA available: {llm.model.context_params.n_gpu_layers > 0}")
|
|
|
10 |
|
11 |
app = FastAPI(docs_url=None, redoc_url=None)
|
12 |
|
13 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
14 |
app.mount("/templates", StaticFiles(directory="templates"), name="templates")
|
15 |
|
16 |
+
pmbl = PMBL("./PMB-7b.Q6_K.gguf") # Path to your .gguf model
|
17 |
|
18 |
@app.head("/")
|
19 |
@app.get("/")
|