PMAlpha

Sleeping

App Files Files Community

PMAlpha / app.py

Sergidev

com201

08bf4ea verified 3 months ago

raw

history blame

1.83 kB

	from fastapi import FastAPI, Request, BackgroundTasks
	from fastapi.responses import HTMLResponse, StreamingResponse
	from fastapi.staticfiles import StaticFiles
	from modules.pmbl import PMBL
	import torch
	from queue import Queue
	import asyncio

	print(f"CUDA available: {torch.cuda.is_available()}")
	print(f"CUDA device count: {torch.cuda.device_count()}")
	if torch.cuda.is_available():
	print(f"CUDA device name: {torch.cuda.get_device_name(0)}")

	app = FastAPI(docs_url=None, redoc_url=None)

	app.mount("/static", StaticFiles(directory="static"), name="static")
	app.mount("/templates", StaticFiles(directory="templates"), name="templates")

	pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
	request_queue = Queue()

	@app.head("/")
	@app.get("/")
	def index() -> HTMLResponse:
	with open("templates/index.html") as f:
	return HTMLResponse(content=f.read())

	async def process_request(user_input: str, mode: str):
	history = pmbl.get_chat_history(mode, user_input)
	async for chunk in pmbl.generate_response(user_input, history, mode):
	yield chunk

	@app.post("/chat")
	async def chat(request: Request, background_tasks: BackgroundTasks):
	try:
	data = await request.json()
	user_input = data["user_input"]
	mode = data["mode"]

	async def stream_response():
	async for chunk in process_request(user_input, mode):
	yield chunk

	return StreamingResponse(stream_response(), media_type="text/plain")
	except Exception as e:
	print(f"[SYSTEM] Error: {str(e)}")
	return {"error": str(e)}

	@app.post("/sleep")
	async def sleep():
	try:
	pmbl.sleep_mode()
	return {"message": "Sleep mode completed successfully"}
	except Exception as e:
	print(f"[SYSTEM] Error: {str(e)}")
	return {"error": str(e)}