Sergidev commited on
Commit
0ffdf21
1 Parent(s): 4ec5dc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -29
app.py CHANGED
@@ -1,9 +1,8 @@
1
- from fastapi import FastAPI, Request, BackgroundTasks
2
  from fastapi.responses import HTMLResponse, StreamingResponse
3
  from fastapi.staticfiles import StaticFiles
4
  from modules.pmbl import PMBL
5
  import torch
6
- import asyncio
7
 
8
  print(f"CUDA available: {torch.cuda.is_available()}")
9
  print(f"CUDA device count: {torch.cuda.device_count()}")
@@ -16,8 +15,6 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
16
  app.mount("/templates", StaticFiles(directory="templates"), name="templates")
17
 
18
  pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
19
- request_queue = asyncio.Queue()
20
- processing_lock = asyncio.Lock()
21
 
22
  @app.head("/")
23
  @app.get("/")
@@ -25,40 +22,19 @@ def index() -> HTMLResponse:
25
  with open("templates/index.html") as f:
26
  return HTMLResponse(content=f.read())
27
 
28
- async def process_request(user_input: str, mode: str):
29
- async with processing_lock:
30
- history = pmbl.get_chat_history(mode, user_input)
31
- async for chunk in pmbl.generate_response(user_input, history, mode):
32
- yield chunk
33
-
34
  @app.post("/chat")
35
- async def chat(request: Request, background_tasks: BackgroundTasks):
36
  try:
37
  data = await request.json()
38
  user_input = data["user_input"]
39
  mode = data["mode"]
40
-
41
- async def response_generator():
42
- await request_queue.put((user_input, mode))
43
- async for chunk in await process_request(user_input, mode):
44
- yield chunk
45
-
46
- return StreamingResponse(response_generator(), media_type="text/plain")
47
  except Exception as e:
48
  print(f"[SYSTEM] Error: {str(e)}")
49
  return {"error": str(e)}
50
 
51
- async def queue_worker():
52
- while True:
53
- user_input, mode = await request_queue.get()
54
- async for _ in process_request(user_input, mode):
55
- pass
56
- request_queue.task_done()
57
-
58
- @app.on_event("startup")
59
- async def startup_event():
60
- asyncio.create_task(queue_worker())
61
-
62
  @app.post("/sleep")
63
  async def sleep():
64
  try:
 
1
+ from fastapi import FastAPI, Request
2
  from fastapi.responses import HTMLResponse, StreamingResponse
3
  from fastapi.staticfiles import StaticFiles
4
  from modules.pmbl import PMBL
5
  import torch
 
6
 
7
  print(f"CUDA available: {torch.cuda.is_available()}")
8
  print(f"CUDA device count: {torch.cuda.device_count()}")
 
15
  app.mount("/templates", StaticFiles(directory="templates"), name="templates")
16
 
17
  pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
 
 
18
 
19
  @app.head("/")
20
  @app.get("/")
 
22
  with open("templates/index.html") as f:
23
  return HTMLResponse(content=f.read())
24
 
 
 
 
 
 
 
25
  @app.post("/chat")
26
+ async def chat(request: Request):
27
  try:
28
  data = await request.json()
29
  user_input = data["user_input"]
30
  mode = data["mode"]
31
+ history = pmbl.get_chat_history(mode, user_input)
32
+ response_generator = pmbl.generate_response(user_input, history, mode)
33
+ return StreamingResponse(response_generator, media_type="text/plain")
 
 
 
 
34
  except Exception as e:
35
  print(f"[SYSTEM] Error: {str(e)}")
36
  return {"error": str(e)}
37
 
 
 
 
 
 
 
 
 
 
 
 
38
  @app.post("/sleep")
39
  async def sleep():
40
  try: