omkar334 commited on
Commit
333a935
1 Parent(s): b0a33e4

upload_book

Browse files
Files changed (1) hide show
  1. scraper.py +17 -3
scraper.py CHANGED
@@ -3,6 +3,9 @@ from string import ascii_lowercase
3
 
4
  import aiohttp
5
 
 
 
 
6
  grade_map = ascii_lowercase[:10]
7
 
8
  subject_map = {
@@ -21,7 +24,7 @@ def get_url(grade, subject, chapter):
21
 
22
 
23
  async def get_book(grade, subject):
24
- book = []
25
  chapter_num = 1
26
  async with aiohttp.ClientSession() as session:
27
  while True:
@@ -30,8 +33,8 @@ async def get_book(grade, subject):
30
  pdf = download(session, url)
31
 
32
  if pdf:
33
- chapter = (pdf, grade, subject)
34
- book.append(chapter)
35
  else:
36
  break
37
  return book
@@ -52,3 +55,14 @@ async def download(session, url):
52
  except Exception as e:
53
  print(f"Error downloading or processing PDF: {e}")
54
  return None
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  import aiohttp
5
 
6
+ from client import HybridClient
7
+ from preprocessing import index_pdf
8
+
9
  grade_map = ascii_lowercase[:10]
10
 
11
  subject_map = {
 
24
 
25
 
26
  async def get_book(grade, subject):
27
+ book = {}
28
  chapter_num = 1
29
  async with aiohttp.ClientSession() as session:
30
  while True:
 
33
  pdf = download(session, url)
34
 
35
  if pdf:
36
+ collection = f"{grade}_{subject}"
37
+ book[collection] = pdf
38
  else:
39
  break
40
  return book
 
55
  except Exception as e:
56
  print(f"Error downloading or processing PDF: {e}")
57
  return None
58
+
59
+
60
+ def upload_book(grade, subject):
61
+ hclient = HybridClient()
62
+
63
+ book = get_book(grade, subject)
64
+ for collection, pdf in book.items():
65
+ chunks = index_pdf(pdf)
66
+
67
+ hclient.create(collection)
68
+ hclient.insert(collection, chunks)