Spaces:

omkar334
/

agentic_rag

Runtime error

File size: 1,606 Bytes

import io
from string import ascii_lowercase

import aiohttp

from client import HybridClient
from preprocessing import index_pdf

grade_map = ascii_lowercase[:10]

subject_map = {
    "science": "esc1",
    "geography": "ess1",
    "economics": "ess2",
    "history": "ess3",
    "politics": "ess4",
}


def get_url(grade, subject, chapter):
    filename = grade_map[grade] + subject_map[subject] + str(chapter).zfill(2)
    url = f"https://ncert.nic.in/textbook/pdf/{filename}.pdf"
    return url


async def get_book(grade, subject):
    book = {}
    chapter_num = 1
    async with aiohttp.ClientSession() as session:
        while True:
            url = get_url(grade, subject, chapter_num)

            pdf = download(session, url)

            if pdf:
                collection = f"{grade}_{subject}"
                book[collection] = pdf
            else:
                break
    return book


async def download(session, url):
    try:
        async with session.get(url, timeout=10) as r:
            r.raise_for_status()

            pdf_content = io.BytesIO()
            async for chunk in r.content.iter_chunked(1000000):
                pdf_content.write(chunk)

            pdf_content.seek(0)
            return pdf_content

    except Exception as e:
        print(f"Error downloading or processing PDF: {e}")
        return None


def upload_book(grade, subject):
    hclient = HybridClient()

    book = get_book(grade, subject)
    for collection, pdf in book.items():
        chunks = index_pdf(pdf)

        hclient.create(collection)
        hclient.insert(collection, chunks)