Spaces:
Sleeping
Sleeping
:gem: [Feature] SearchResultsExtractor: related questions
Browse files
documents/search_results_extractor.py
CHANGED
@@ -27,17 +27,21 @@ class SearchResultsExtractor:
|
|
27 |
print(
|
28 |
f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
|
29 |
)
|
|
|
30 |
|
31 |
def extract_related_questions(self):
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
print(question)
|
35 |
-
|
36 |
-
# print(question.find("a").text)
|
37 |
|
38 |
def extract(self, html_path):
|
39 |
self.load_html(html_path)
|
40 |
self.extract_search_results()
|
|
|
41 |
|
42 |
|
43 |
if __name__ == "__main__":
|
|
|
27 |
print(
|
28 |
f"{title}\n" f" - {site}\n" f" - {link}\n" f" - {abstract}\n" f"\n"
|
29 |
)
|
30 |
+
print(len(search_result_elements))
|
31 |
|
32 |
def extract_related_questions(self):
|
33 |
+
related_question_elements = self.soup.find_all(
|
34 |
+
"div", class_="related-question-pair"
|
35 |
+
)
|
36 |
+
for question_element in related_question_elements:
|
37 |
+
question = question_element.find("span").text.strip()
|
38 |
print(question)
|
39 |
+
print(len(related_question_elements))
|
|
|
40 |
|
41 |
def extract(self, html_path):
|
42 |
self.load_html(html_path)
|
43 |
self.extract_search_results()
|
44 |
+
self.extract_related_questions()
|
45 |
|
46 |
|
47 |
if __name__ == "__main__":
|