Kajise commited on
Commit
7f01d00
1 Parent(s): 04cc455

Delete SearchResult.py

Browse files
Files changed (1) hide show
  1. SearchResult.py +0 -45
SearchResult.py DELETED
@@ -1,45 +0,0 @@
1
- import re
2
- import requests
3
- from bs4 import BeautifulSoup
4
-
5
- class SearchResult:
6
- def __init__(self, results: list[str], user_agent: str, did_you_mean: str = "", tailored_query: str = ""):
7
- self.results = results
8
- self.user_agent = user_agent
9
- self.suggestion_query = did_you_mean
10
- self.tailored_query = tailored_query
11
-
12
- def parse_results(self):
13
- results = self.results[1:]
14
- headers = {
15
- "User-Agent": self.user_agent
16
- }
17
-
18
- stripped_pages: list[{
19
- "page_title": str,
20
- "text_content": str
21
- }] = [] # type: ignore
22
-
23
- for link_entry in results:
24
- if len(stripped_pages) < 1:
25
- twitter_pattern = re.compile(r".*twitter.*", re.IGNORECASE)
26
-
27
- if not re.search(twitter_pattern, link_entry):
28
- text_content = ""
29
- response = requests.get(link_entry, headers=headers)
30
- soup = BeautifulSoup(response.text, "html.parser")
31
-
32
- title = soup.title.string or "No title provided" # type: ignore
33
- relevant_tags = ["p", "li", "h1", "h2", "h3", "h4", "h5", "h6"]
34
-
35
- for tag in relevant_tags:
36
- elements = soup.find_all(tag, class_=lambda c: c != 'ads' and c != 'header' and c != 'footer')
37
- for element in elements:
38
- if element.text.strip().lower():
39
- text_content += element.text.strip() + '\n'
40
-
41
- stripped_pages.append({ "page_title": title, "text_content": text_content })
42
- else:
43
- continue
44
-
45
- return stripped_pages