# Generated by CodiumAI import requests from autogpt.commands.web_requests import scrape_text """ Code Analysis Objective: The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts. Inputs: - url: a string representing the URL of the webpage to be scraped. Flow: 1. Send a GET request to the given URL using the requests library and the user agent header from the config file. 2. Check if the response contains an HTTP error. If it does, return an error message. 3. Use BeautifulSoup to parse the HTML content of the response and extract all script and style tags. 4. Get the text content of the remaining HTML using the get_text() method of BeautifulSoup. 5. Split the text into lines and then into chunks, removing any extra whitespace. 6. Join the chunks into a single string with newline characters between them. 7. Return the cleaned text. Outputs: - A string representing the cleaned text content of the webpage. Additional aspects: - The function uses the requests library and BeautifulSoup to handle the HTTP request and HTML parsing, respectively. - The function removes script and style tags from the HTML to avoid including unwanted content in the text output. - The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text. """ class TestScrapeText: # Tests that scrape_text() returns the expected text when given a valid URL. def test_scrape_text_with_valid_url(self, mocker): # Mock the requests.get() method to return a response with expected text expected_text = "This is some sample text" mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = f"
{expected_text}
This is bold text.
" mock_response = mocker.Mock() mock_response.status_code = 200 mock_response.text = html mocker.patch("requests.Session.get", return_value=mock_response) # Call the function with a URL result = scrape_text("https://www.example.com") # Check that the function properly handles HTML tags assert result == "This is bold text."