Spaces:
Sleeping
Sleeping
:recycle: [Refactor] QueryResultsExtractor: prettify logging
Browse files
documents/query_results_extractor.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from bs4 import BeautifulSoup
|
2 |
from pathlib import Path
|
|
|
3 |
|
4 |
|
5 |
class QueryResultsExtractor:
|
@@ -24,7 +25,9 @@ class QueryResultsExtractor:
|
|
24 |
if abstract_element is None:
|
25 |
abstract_element = result.find("div", class_="ITZIwc")
|
26 |
abstract = abstract_element.text.strip()
|
27 |
-
|
|
|
|
|
28 |
self.query_results.append(
|
29 |
{
|
30 |
"title": title,
|
@@ -35,7 +38,7 @@ class QueryResultsExtractor:
|
|
35 |
"type": "web",
|
36 |
}
|
37 |
)
|
38 |
-
|
39 |
|
40 |
def extract_related_questions(self):
|
41 |
related_question_elements = self.soup.find_all(
|
@@ -45,7 +48,7 @@ class QueryResultsExtractor:
|
|
45 |
question = question_element.find("span").text.strip()
|
46 |
print(question)
|
47 |
self.related_questions.append(question)
|
48 |
-
|
49 |
|
50 |
def extract(self, html_path):
|
51 |
self.load_html(html_path)
|
|
|
1 |
from bs4 import BeautifulSoup
|
2 |
from pathlib import Path
|
3 |
+
from utils.logger import logger
|
4 |
|
5 |
|
6 |
class QueryResultsExtractor:
|
|
|
25 |
if abstract_element is None:
|
26 |
abstract_element = result.find("div", class_="ITZIwc")
|
27 |
abstract = abstract_element.text.strip()
|
28 |
+
logger.mesg(
|
29 |
+
f"{title}\n" f" - {site}\n" f" - {url}\n" f" - {abstract}\n" f"\n"
|
30 |
+
)
|
31 |
self.query_results.append(
|
32 |
{
|
33 |
"title": title,
|
|
|
38 |
"type": "web",
|
39 |
}
|
40 |
)
|
41 |
+
logger.success(f"- {len(query_result_elements)} query results")
|
42 |
|
43 |
def extract_related_questions(self):
|
44 |
related_question_elements = self.soup.find_all(
|
|
|
48 |
question = question_element.find("span").text.strip()
|
49 |
print(question)
|
50 |
self.related_questions.append(question)
|
51 |
+
logger.success(f"- {len(self.related_questions)} related questions")
|
52 |
|
53 |
def extract(self, html_path):
|
54 |
self.load_html(html_path)
|