aiflows
/

ControllerExecutorFlowModule

Model card Files Files and versions Community

martinjosifoski commited on Nov 2, 2023

Commit

cbb225c

•

1 Parent(s): 9042d2b

First commit.

Browse files

Files changed (11) hide show

.gitignore +440 -0
ControllerAtomicFlow.py +55 -0
ControllerAtomicFlow.yaml +91 -0
ControllerExecutorFlow.py +29 -0
ControllerExecutorFlow.yaml +65 -0
README.md +2 -0
WikiSearchAtomicFlow.py +44 -0
WikiSearchAtomicFlow.yaml +17 -0
__init__.py +3 -0
pip_requirements.txt +1 -0
wikipediaAPI.py +117 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,440 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
+### C++ ###
+# Prerequisites
+*.d
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+# Precompiled Headers
+*.gch
+*.pch
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+# Fortran module files
+*.mod
+*.smod
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+# Executables
+*.exe
+*.out
+*.app
+### Java ###
+# Compiled class file
+*.class
+# Log file
+*.log
+# BlueJ files
+*.ctxt
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+# Package Files #
+*.jar
+*.war
+*.nar
+*.ear
+*.zip
+*.tar.gz
+*.rar
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+replay_pid*
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+# Local History for Visual Studio Code
+.history/
+# Built Visual Studio Code Extensions
+*.vsix
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+# Dump file
+*.stackdump
+# Folder config file
+[Dd]esktop.ini
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# Windows shortcuts
+*.lnk
+# End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows

ControllerAtomicFlow.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import json
+from copy import deepcopy
+from typing import Any, Dict, List
+from flows.application_flows import OpenAIChatAtomicFlow
+from dataclasses import dataclass
+@dataclass
+class Command:
+    name: str
+    description: str
+    input_args: List[str]
+class ControllerAtomicFlow(OpenAIChatAtomicFlow):
+    def __init__(self, commands: List[Command], **kwargs):
+        super().__init__(**kwargs)
+        self.system_message_prompt_template = self.system_message_prompt_template.partial(
+            commands=self._build_commands_manual(commands)
+        )
+    @staticmethod
+    def _build_commands_manual(commands: List[Command]) -> str:
+        ret = ""
+        for i, command in enumerate(commands):
+            command_input_json_schema = json.dumps(
+                {input_arg: f"YOUR_{input_arg.upper()}" for input_arg in command.input_args})
+            ret += f"{i + 1}. {command.name}: {command.description} Input arguments (given in the JSON schema): {command_input_json_schema}\n"
+        return ret
+    @classmethod
+    def instantiate_from_config(cls, config):
+        flow_config = deepcopy(config)
+        kwargs = {"flow_config": flow_config}
+        # ~~~ Set up prompts ~~~
+        kwargs.update(cls._set_up_prompts(flow_config))
+        # ~~~ Set up commands ~~~
+        commands = flow_config["commands"]
+        commands = [
+            Command(name, command_conf["description"], command_conf["input_args"]) for name, command_conf in
+            commands.items()
+        ]
+        kwargs.update({"commands": commands})
+        # ~~~ Instantiate flow ~~~
+        return cls(**kwargs)
+    def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        api_output = super().run(input_data)["api_output"].strip()
+        response = json.loads(api_output)
+        return response

ControllerAtomicFlow.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+name: "ControllerFlow"
+description: "Proposes the next action to take towards achieving the goal, and prepares the input for the executor."
+enable_cache: True
+#######################################################
+# Input keys
+#######################################################
+input_interface_non_initialized: # initial input keys
+  - "goal"
+input_interface_initialized: # input_keys
+  - "observation"
+#######################################################
+# Output keys
+#######################################################
+output_interface:
+  - 'thought'
+  - 'reasoning'
+  - 'plan'
+  - 'criticism'
+  - 'speak'
+  - 'command'
+  - 'command_args'
+#######################################################
+# ToDo: Some parts of the prompt don't make sense -- update them
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    You are a smart AI assistant.
+    Your decisions must always be made independently without seeking user assistance.
+    Play to your strengths as an LLM and pursue simple strategies with no legal complications.
+    If you have completed all your tasks, make sure to use the "finish" command.
+    Constraints:
+    1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files
+    2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember
+    3. No user assistance
+    4. Exclusively use the commands listed in double quotes e.g. "command name"
+    Available commands:
+    {{commands}}
+    Resources:
+    1. Internet access for searches and information gathering.
+    2. Long Term memory management.
+    3. GPT-3.5 powered Agents for delegation of simple tasks.
+    Performance Evaluation:
+    1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.
+    2. Constructively self-criticize your big-picture behavior constantly.
+    3. Reflect on past decisions and strategies to refine your approach.
+    4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
+    You should only respond in JSON format as described below
+    Response Format:
+    {
+    "thought": "thought",
+    "reasoning": "reasoning",
+    "plan": "- short bulleted\n- list that conveys\n- long-term plan",
+    "criticism": "constructive self-criticism",
+    "speak": "thoughts summary to say to user",
+    "command": "command name",
+    "command_args": {
+        "arg name": "value"
+        }
+    }
+    Ensure your responses can be parsed by Python json.loads
+  input_variables: ["commands"]
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Here is the response to your last action:
+    {{observation}}
+  input_variables:
+    - "observation"
+  template_format: jinja2
+init_human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Here is the goal you need to achieve:
+    {{goal}}
+  input_variables:
+    - "goal"
+  template_format: jinja2

ControllerExecutorFlow.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from typing import Dict, Any
+from flows.base_flows import CircularFlow
+from flows.utils import logging
+from .ControllerAtomicFlow import ControllerAtomicFlow
+logging.set_verbosity_debug()
+log = logging.get_logger(__name__)
+class ControllerExecutorFlow(CircularFlow):
+    def _on_reach_max_round(self):
+        self._state_update_dict({
+            "answer": "The maximum amount of rounds was reached before the model found an answer.",
+            "status": "unfinished"
+        })
+    @CircularFlow.output_msg_payload_processor
+    def detect_finish_or_continue(self, output_payload: Dict[str, Any], src_flow: ControllerAtomicFlow) -> Dict[str, Any]:
+        command = output_payload["command"]
+        if command == "finish":
+            return {
+                "EARLY_EXIT": True,
+                "answer": output_payload["command_args"]["answer"],
+                "status": "finished"
+            }
+        else:
+            return output_payload

ControllerExecutorFlow.yaml ADDED Viewed

	@@ -0,0 +1,65 @@

+name: "CtrlEx"
+description: "ControllerExecutor (i.e., MRKL, ReAct) interaction implementation with Flows that approaches the problem solving in two phases: one Flow chooses the next step and another Flow executes it. This is repeated until the controller Flow concludes on an answer."
+max_rounds: 30
+### Information used by the default interface specification implementation
+input_interface:
+  - "goal"
+output_interface:
+  - "answer"
+  - "status"
+### Subflows specification
+subflows_config:
+  Controller:
+    _target_: .ControllerAtomicFlow.instantiate_from_default_config
+    finish:
+      description: "Signal that the objective has been satisfied, and returns the answer to the user."
+      input_args: ["answer"]
+# E.g.,
+#    commands:
+#      wiki_search:
+#        description: "Performs a search on Wikipedia."
+#        input_args: ["search_term"]
+#      ddg_search:
+#        description: "Query the search engine DuckDuckGo."
+#        input_args: ["query"]
+  Executor:
+    _target_: flows.base_flows.BranchingFlow.instantiate_from_default_config
+# E.g.,
+#    subflows_config:
+#      wiki_search:
+#        _target_: .WikiSearchAtomicFlow.instantiate_from_default_config
+#      ddg_search:
+#        _target_: flows.application_flows.LCToolFlowModule.LCToolFlow.instantiate_from_default_config
+#        backend:
+#          _target_: langchain.tools.DuckDuckGoSearchRun
+early_exit_key: "EARLY_EXIT"
+topology:
+  - goal: "Select the next action and prepare the input for the executor."
+    input_interface:
+      _target_: flows.interfaces.KeyInterface
+      additional_transformations:
+        - _target_: flows.data_transformations.KeyMatchInput
+    flow: Controller
+    output_interface:
+      _target_: detect_finish_or_continue
+    reset: false
+  - goal: "Execute the action specified by the Controller."
+    input_interface:
+      _target_: flows.interfaces.KeyInterface
+      keys_to_rename:
+        command: branch
+        command_args: branch_input_data
+      keys_to_select: ["branch", "branch_input_data"]
+    flow: Executor
+    output_interface:
+      _target_: flows.interfaces.KeyInterface
+      keys_to_rename:
+        branch_output_data: observation
+      keys_to_select: ["observation"]
+    reset: false

README.md CHANGED Viewed

@@ -1,3 +1,5 @@
 ---
 license: mit
 ---

 ---
 license: mit
 ---
+# ToDo

WikiSearchAtomicFlow.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from copy import deepcopy
+from typing import List, Dict, Optional, Any
+from flows.base_flows import AtomicFlow
+from flows.utils import logging
+from .wikipediaAPI import WikipediaAPIWrapper
+log = logging.get_logger(__name__)
+class WikiSearchAtomicFlow(AtomicFlow):
+    REQUIRED_KEYS_CONFIG = ["lang", "top_k_results", "doc_content_chars_max"]
+    REQUIRED_KEYS_CONSTRUCTOR = []
+    SUPPORTS_CACHING: bool = True
+    api_wrapper: WikipediaAPIWrapper
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def run(self,
+            input_data: Dict[str, Any]) -> Dict[str, Any]:
+        # ~~~ Process input ~~~
+        term = input_data.get("search_term", None)
+        api_wrapper = WikipediaAPIWrapper(
+            lang=self.flow_config["lang"],
+            top_k_results=self.flow_config["top_k_results"],
+            doc_content_chars_max=self.flow_config["doc_content_chars_max"]
+        )
+        # ~~~ Call ~~~
+        if page_content := api_wrapper._fetch_page(term):
+            search_response = {"wiki_content": page_content, "relevant_pages": None}
+        else:
+            page_titles = api_wrapper.search_page_titles(term)
+            search_response = {"wiki_content": None, "relevant_pages": f"Could not find [{term}]. similar: {page_titles}"}
+        # Log the update to the flow messages list
+        observation = search_response["wiki_content"] if search_response["wiki_content"] else search_response["relevant_pages"]
+        return {"wiki_content": observation}

WikiSearchAtomicFlow.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+name: WikiSearchAtomicFlow
+description: "A Flow that queries the wikipedia API for a page content."
+keep_raw_response: false
+lang: en
+top_k_results: 5
+doc_content_chars_max: 3000
+input_interface:
+  - "search_term"
+output_interface:
+  - "wiki_content"

__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .ControllerExecutorFlow import ControllerExecutorFlow
+from .ControllerAtomicFlow import ControllerAtomicFlow
+from .WikiSearchAtomicFlow import WikiSearchAtomicFlow

pip_requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ duckduckgo-search==3.9.2

wikipediaAPI.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""Util that calls Wikipedia. references: https://github.com/hwchase17/langchain/blob/9b615022e2b6a3591347ad77a3e21aad6cf24c49/docs/extras/modules/agents/tools/integrations/wikipedia.ipynb#L36"""
+import logging
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, root_validator
+logger = logging.getLogger(__name__)
+WIKIPEDIA_MAX_QUERY_LENGTH = 300
+class WikipediaAPIWrapper(BaseModel):
+    """Wrapper around WikipediaAPI.
+    To use, you should have the ``wikipedia`` python package installed.
+    This wrapper will use the Wikipedia API to conduct searches and
+    fetch page summaries. By default, it will return the page summaries
+    of the top-k results.
+    It limits the Document content by doc_content_chars_max.
+    """
+    wiki_client: Any
+    top_k_results: int = 5
+    lang: str = "en"
+    doc_content_chars_max: int = 4000
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that the python package exists in environment."""
+        try:
+            import wikipedia
+            wikipedia.set_lang(values["lang"])
+            values["wiki_client"] = wikipedia
+        except ImportError:
+            raise ImportError(
+                "Could not import wikipedia python package. "
+                "Please install it with `pip install wikipedia`."
+            )
+        return values
+    def run(self, query: str) -> str:
+        """Run Wikipedia search and get page summaries."""
+        page_titles = self.search_page_titles(query)
+        summaries = []
+        for page_title in page_titles:
+            if wiki_page := self._fetch_page(page_title):
+                if summary := self._formatted_page_summary(page_title, wiki_page):
+                    summaries.append(summary)
+        if not summaries:
+            return "No good Wikipedia Search Result was found"
+        return "\n\n".join(summaries)[: self.doc_content_chars_max]
+    def _fetch_page(self, page: str) -> Optional[str]:
+        try:
+            return self.wiki_client.page(title=page, auto_suggest=False).content[: self.doc_content_chars_max]
+        except (
+            self.wiki_client.exceptions.PageError,
+            self.wiki_client.exceptions.DisambiguationError,
+        ):
+            return None
+    def search_page_titles(self, query: str) -> List[str]:
+        """Run Wikipedia search and get page summaries."""
+        return self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])[:self.top_k_results]
+    # def _page_to_document(self, page_title: str, wiki_page: Any) -> Document:
+    #     main_meta = {
+    #         "title": page_title,
+    #         "summary": wiki_page.summary,
+    #         "source": wiki_page.url,
+    #     }
+    #     add_meta = (
+    #         {
+    #             "categories": wiki_page.categories,
+    #             "page_url": wiki_page.url,
+    #             "image_urls": wiki_page.images,
+    #             "related_titles": wiki_page.links,
+    #             "parent_id": wiki_page.parent_id,
+    #             "references": wiki_page.references,
+    #             "revision_id": wiki_page.revision_id,
+    #             "sections": wiki_page.sections,
+    #         }
+    #         if self.load_all_available_meta
+    #         else {}
+    #     )
+    #     doc = Document(
+    #         page_content=wiki_page.content[: self.doc_content_chars_max],
+    #         metadata={
+    #             **main_meta,
+    #             **add_meta,
+    #         },
+    #     )
+    #     return doc
+    @staticmethod
+    def _formatted_page_summary(page_title: str, wiki_page: Any) -> Optional[str]:
+        return f"Page: {page_title}\nSummary: {wiki_page.summary}"
+    # def load(self, query: str) -> List[Document]:
+    #     """
+    #     Run Wikipedia search and get the article text plus the meta information.
+    #     See
+    #
+    #     Returns: a list of documents.
+    #
+    #     """
+    #     page_titles = self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])
+    #     docs = []
+    #     for page_title in page_titles[: self.top_k_results]:
+    #         if wiki_page := self._fetch_page(page_title):
+    #             if doc := self._page_to_document(page_title, wiki_page):
+    #                 docs.append(doc)
+    #     return docs