PMAlpha

Sleeping

PMAlpha / app.py

use gpu (delete if incorrect)

20644c4 verified 3 months ago

1.55 kB

	from fastapi import FastAPI, Request
	from fastapi.responses import HTMLResponse, StreamingResponse
	from fastapi.staticfiles import StaticFiles
	from modules.pmbl import PMBL
	import torch

	print(f"CUDA available: {torch.cuda.is_available()}")
	print(f"CUDA device count: {torch.cuda.device_count()}")
	if torch.cuda.is_available():
	print(f"CUDA device name: {torch.cuda.get_device_name(0)}")

	app = FastAPI(docs_url=None, redoc_url=None)

	app.mount("/static", StaticFiles(directory="static"), name="static")
	app.mount("/templates", StaticFiles(directory="templates"), name="templates")

	pmbl = PMBL("./PMB-7b.Q6_K.gguf", use_gpu=True) # Replace with the path to your model

	@app.head("/")
	@app.get("/")
	def index() -> HTMLResponse:
	with open("templates/index.html") as f:
	return HTMLResponse(content=f.read())

	@app.post("/chat")
	async def chat(request: Request):
	try:
	data = await request.json()
	user_input = data["user_input"]
	mode = data["mode"]
	history = pmbl.get_chat_history(mode, user_input)
	response_generator = pmbl.generate_response(user_input, history, mode)
	return StreamingResponse(response_generator, media_type="text/plain")
	except Exception as e:
	print(f"[SYSTEM] Error: {str(e)}")
	return {"error": str(e)}

	@app.post("/sleep")
	async def sleep():
	try:
	pmbl.sleep_mode()
	return {"message": "Sleep mode completed successfully"}
	except Exception as e:
	print(f"[SYSTEM] Error: {str(e)}")
	return {"error": str(e)}