martinjosifoski commited on
Commit
cbb225c
1 Parent(s): 9042d2b

First commit.

Browse files
.gitignore ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
3
+
4
+ ### C++ ###
5
+ # Prerequisites
6
+ *.d
7
+
8
+ # Compiled Object files
9
+ *.slo
10
+ *.lo
11
+ *.o
12
+ *.obj
13
+
14
+ # Precompiled Headers
15
+ *.gch
16
+ *.pch
17
+
18
+ # Compiled Dynamic libraries
19
+ *.so
20
+ *.dylib
21
+ *.dll
22
+
23
+ # Fortran module files
24
+ *.mod
25
+ *.smod
26
+
27
+ # Compiled Static libraries
28
+ *.lai
29
+ *.la
30
+ *.a
31
+ *.lib
32
+
33
+ # Executables
34
+ *.exe
35
+ *.out
36
+ *.app
37
+
38
+ ### Java ###
39
+ # Compiled class file
40
+ *.class
41
+
42
+ # Log file
43
+ *.log
44
+
45
+ # BlueJ files
46
+ *.ctxt
47
+
48
+ # Mobile Tools for Java (J2ME)
49
+ .mtj.tmp/
50
+
51
+ # Package Files #
52
+ *.jar
53
+ *.war
54
+ *.nar
55
+ *.ear
56
+ *.zip
57
+ *.tar.gz
58
+ *.rar
59
+
60
+ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
61
+ hs_err_pid*
62
+ replay_pid*
63
+
64
+ ### Linux ###
65
+ *~
66
+
67
+ # temporary files which can be created if a process still has a handle open of a deleted file
68
+ .fuse_hidden*
69
+
70
+ # KDE directory preferences
71
+ .directory
72
+
73
+ # Linux trash folder which might appear on any partition or disk
74
+ .Trash-*
75
+
76
+ # .nfs files are created when an open file is removed but is still being accessed
77
+ .nfs*
78
+
79
+ ### macOS ###
80
+ # General
81
+ .DS_Store
82
+ .AppleDouble
83
+ .LSOverride
84
+
85
+ # Icon must end with two \r
86
+ Icon
87
+
88
+
89
+ # Thumbnails
90
+ ._*
91
+
92
+ # Files that might appear in the root of a volume
93
+ .DocumentRevisions-V100
94
+ .fseventsd
95
+ .Spotlight-V100
96
+ .TemporaryItems
97
+ .Trashes
98
+ .VolumeIcon.icns
99
+ .com.apple.timemachine.donotpresent
100
+
101
+ # Directories potentially created on remote AFP share
102
+ .AppleDB
103
+ .AppleDesktop
104
+ Network Trash Folder
105
+ Temporary Items
106
+ .apdisk
107
+
108
+ ### macOS Patch ###
109
+ # iCloud generated files
110
+ *.icloud
111
+
112
+ ### PyCharm ###
113
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
114
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
115
+
116
+ # User-specific stuff
117
+ .idea/**/workspace.xml
118
+ .idea/**/tasks.xml
119
+ .idea/**/usage.statistics.xml
120
+ .idea/**/dictionaries
121
+ .idea/**/shelf
122
+
123
+ # AWS User-specific
124
+ .idea/**/aws.xml
125
+
126
+ # Generated files
127
+ .idea/**/contentModel.xml
128
+
129
+ # Sensitive or high-churn files
130
+ .idea/**/dataSources/
131
+ .idea/**/dataSources.ids
132
+ .idea/**/dataSources.local.xml
133
+ .idea/**/sqlDataSources.xml
134
+ .idea/**/dynamic.xml
135
+ .idea/**/uiDesigner.xml
136
+ .idea/**/dbnavigator.xml
137
+
138
+ # Gradle
139
+ .idea/**/gradle.xml
140
+ .idea/**/libraries
141
+
142
+ # Gradle and Maven with auto-import
143
+ # When using Gradle or Maven with auto-import, you should exclude module files,
144
+ # since they will be recreated, and may cause churn. Uncomment if using
145
+ # auto-import.
146
+ # .idea/artifacts
147
+ # .idea/compiler.xml
148
+ # .idea/jarRepositories.xml
149
+ # .idea/modules.xml
150
+ # .idea/*.iml
151
+ # .idea/modules
152
+ # *.iml
153
+ # *.ipr
154
+
155
+ # CMake
156
+ cmake-build-*/
157
+
158
+ # Mongo Explorer plugin
159
+ .idea/**/mongoSettings.xml
160
+
161
+ # File-based project format
162
+ *.iws
163
+
164
+ # IntelliJ
165
+ out/
166
+
167
+ # mpeltonen/sbt-idea plugin
168
+ .idea_modules/
169
+
170
+ # JIRA plugin
171
+ atlassian-ide-plugin.xml
172
+
173
+ # Cursive Clojure plugin
174
+ .idea/replstate.xml
175
+
176
+ # SonarLint plugin
177
+ .idea/sonarlint/
178
+
179
+ # Crashlytics plugin (for Android Studio and IntelliJ)
180
+ com_crashlytics_export_strings.xml
181
+ crashlytics.properties
182
+ crashlytics-build.properties
183
+ fabric.properties
184
+
185
+ # Editor-based Rest Client
186
+ .idea/httpRequests
187
+
188
+ # Android studio 3.1+ serialized cache file
189
+ .idea/caches/build_file_checksums.ser
190
+
191
+ ### PyCharm Patch ###
192
+ # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
193
+
194
+ # *.iml
195
+ # modules.xml
196
+ # .idea/misc.xml
197
+ # *.ipr
198
+
199
+ # Sonarlint plugin
200
+ # https://plugins.jetbrains.com/plugin/7973-sonarlint
201
+ .idea/**/sonarlint/
202
+
203
+ # SonarQube Plugin
204
+ # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
205
+ .idea/**/sonarIssues.xml
206
+
207
+ # Markdown Navigator plugin
208
+ # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
209
+ .idea/**/markdown-navigator.xml
210
+ .idea/**/markdown-navigator-enh.xml
211
+ .idea/**/markdown-navigator/
212
+
213
+ # Cache file creation bug
214
+ # See https://youtrack.jetbrains.com/issue/JBR-2257
215
+ .idea/$CACHE_FILE$
216
+
217
+ # CodeStream plugin
218
+ # https://plugins.jetbrains.com/plugin/12206-codestream
219
+ .idea/codestream.xml
220
+
221
+ # Azure Toolkit for IntelliJ plugin
222
+ # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
223
+ .idea/**/azureSettings.xml
224
+
225
+ ### Python ###
226
+ # Byte-compiled / optimized / DLL files
227
+ __pycache__/
228
+ *.py[cod]
229
+ *$py.class
230
+
231
+ # C extensions
232
+
233
+ # Distribution / packaging
234
+ .Python
235
+ build/
236
+ develop-eggs/
237
+ dist/
238
+ downloads/
239
+ eggs/
240
+ .eggs/
241
+ lib/
242
+ lib64/
243
+ parts/
244
+ sdist/
245
+ var/
246
+ wheels/
247
+ share/python-wheels/
248
+ *.egg-info/
249
+ .installed.cfg
250
+ *.egg
251
+ MANIFEST
252
+
253
+ # PyInstaller
254
+ # Usually these files are written by a python script from a template
255
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
256
+ *.manifest
257
+ *.spec
258
+
259
+ # Installer logs
260
+ pip-log.txt
261
+ pip-delete-this-directory.txt
262
+
263
+ # Unit test / coverage reports
264
+ htmlcov/
265
+ .tox/
266
+ .nox/
267
+ .coverage
268
+ .coverage.*
269
+ .cache
270
+ nosetests.xml
271
+ coverage.xml
272
+ *.cover
273
+ *.py,cover
274
+ .hypothesis/
275
+ .pytest_cache/
276
+ cover/
277
+
278
+ # Translations
279
+ *.mo
280
+ *.pot
281
+
282
+ # Django stuff:
283
+ local_settings.py
284
+ db.sqlite3
285
+ db.sqlite3-journal
286
+
287
+ # Flask stuff:
288
+ instance/
289
+ .webassets-cache
290
+
291
+ # Scrapy stuff:
292
+ .scrapy
293
+
294
+ # Sphinx documentation
295
+ docs/_build/
296
+
297
+ # PyBuilder
298
+ .pybuilder/
299
+ target/
300
+
301
+ # Jupyter Notebook
302
+ .ipynb_checkpoints
303
+
304
+ # IPython
305
+ profile_default/
306
+ ipython_config.py
307
+
308
+ # pyenv
309
+ # For a library or package, you might want to ignore these files since the code is
310
+ # intended to run in multiple environments; otherwise, check them in:
311
+ # .python-version
312
+
313
+ # pipenv
314
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
315
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
316
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
317
+ # install all needed dependencies.
318
+ #Pipfile.lock
319
+
320
+ # poetry
321
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
322
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
323
+ # commonly ignored for libraries.
324
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
325
+ #poetry.lock
326
+
327
+ # pdm
328
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
329
+ #pdm.lock
330
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
331
+ # in version control.
332
+ # https://pdm.fming.dev/#use-with-ide
333
+ .pdm.toml
334
+
335
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
336
+ __pypackages__/
337
+
338
+ # Celery stuff
339
+ celerybeat-schedule
340
+ celerybeat.pid
341
+
342
+ # SageMath parsed files
343
+ *.sage.py
344
+
345
+ # Environments
346
+ .env
347
+ .venv
348
+ env/
349
+ venv/
350
+ ENV/
351
+ env.bak/
352
+ venv.bak/
353
+
354
+ # Spyder project settings
355
+ .spyderproject
356
+ .spyproject
357
+
358
+ # Rope project settings
359
+ .ropeproject
360
+
361
+ # mkdocs documentation
362
+ /site
363
+
364
+ # mypy
365
+ .mypy_cache/
366
+ .dmypy.json
367
+ dmypy.json
368
+
369
+ # Pyre type checker
370
+ .pyre/
371
+
372
+ # pytype static type analyzer
373
+ .pytype/
374
+
375
+ # Cython debug symbols
376
+ cython_debug/
377
+
378
+ # PyCharm
379
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
380
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
381
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
382
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
383
+ #.idea/
384
+
385
+ ### Python Patch ###
386
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
387
+ poetry.toml
388
+
389
+ # ruff
390
+ .ruff_cache/
391
+
392
+ # LSP config files
393
+ pyrightconfig.json
394
+
395
+ ### VisualStudioCode ###
396
+ .vscode/*
397
+ !.vscode/settings.json
398
+ !.vscode/tasks.json
399
+ !.vscode/launch.json
400
+ !.vscode/extensions.json
401
+ !.vscode/*.code-snippets
402
+
403
+ # Local History for Visual Studio Code
404
+ .history/
405
+
406
+ # Built Visual Studio Code Extensions
407
+ *.vsix
408
+
409
+ ### VisualStudioCode Patch ###
410
+ # Ignore all local history of files
411
+ .history
412
+ .ionide
413
+
414
+ ### Windows ###
415
+ # Windows thumbnail cache files
416
+ Thumbs.db
417
+ Thumbs.db:encryptable
418
+ ehthumbs.db
419
+ ehthumbs_vista.db
420
+
421
+ # Dump file
422
+ *.stackdump
423
+
424
+ # Folder config file
425
+ [Dd]esktop.ini
426
+
427
+ # Recycle Bin used on file shares
428
+ $RECYCLE.BIN/
429
+
430
+ # Windows Installer files
431
+ *.cab
432
+ *.msi
433
+ *.msix
434
+ *.msm
435
+ *.msp
436
+
437
+ # Windows shortcuts
438
+ *.lnk
439
+
440
+ # End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
ControllerAtomicFlow.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from copy import deepcopy
3
+ from typing import Any, Dict, List
4
+ from flows.application_flows import OpenAIChatAtomicFlow
5
+
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class Command:
11
+ name: str
12
+ description: str
13
+ input_args: List[str]
14
+
15
+
16
+ class ControllerAtomicFlow(OpenAIChatAtomicFlow):
17
+ def __init__(self, commands: List[Command], **kwargs):
18
+ super().__init__(**kwargs)
19
+ self.system_message_prompt_template = self.system_message_prompt_template.partial(
20
+ commands=self._build_commands_manual(commands)
21
+ )
22
+
23
+ @staticmethod
24
+ def _build_commands_manual(commands: List[Command]) -> str:
25
+ ret = ""
26
+ for i, command in enumerate(commands):
27
+ command_input_json_schema = json.dumps(
28
+ {input_arg: f"YOUR_{input_arg.upper()}" for input_arg in command.input_args})
29
+ ret += f"{i + 1}. {command.name}: {command.description} Input arguments (given in the JSON schema): {command_input_json_schema}\n"
30
+ return ret
31
+
32
+ @classmethod
33
+ def instantiate_from_config(cls, config):
34
+ flow_config = deepcopy(config)
35
+
36
+ kwargs = {"flow_config": flow_config}
37
+
38
+ # ~~~ Set up prompts ~~~
39
+ kwargs.update(cls._set_up_prompts(flow_config))
40
+
41
+ # ~~~ Set up commands ~~~
42
+ commands = flow_config["commands"]
43
+ commands = [
44
+ Command(name, command_conf["description"], command_conf["input_args"]) for name, command_conf in
45
+ commands.items()
46
+ ]
47
+ kwargs.update({"commands": commands})
48
+
49
+ # ~~~ Instantiate flow ~~~
50
+ return cls(**kwargs)
51
+
52
+ def run(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
53
+ api_output = super().run(input_data)["api_output"].strip()
54
+ response = json.loads(api_output)
55
+ return response
ControllerAtomicFlow.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "ControllerFlow"
2
+ description: "Proposes the next action to take towards achieving the goal, and prepares the input for the executor."
3
+ enable_cache: True
4
+
5
+ #######################################################
6
+ # Input keys
7
+ #######################################################
8
+
9
+ input_interface_non_initialized: # initial input keys
10
+ - "goal"
11
+
12
+ input_interface_initialized: # input_keys
13
+ - "observation"
14
+
15
+ #######################################################
16
+ # Output keys
17
+ #######################################################
18
+
19
+ output_interface:
20
+ - 'thought'
21
+ - 'reasoning'
22
+ - 'plan'
23
+ - 'criticism'
24
+ - 'speak'
25
+ - 'command'
26
+ - 'command_args'
27
+
28
+ #######################################################
29
+ # ToDo: Some parts of the prompt don't make sense -- update them
30
+ system_message_prompt_template:
31
+ _target_: langchain.PromptTemplate
32
+ template: |2-
33
+ You are a smart AI assistant.
34
+
35
+ Your decisions must always be made independently without seeking user assistance.
36
+ Play to your strengths as an LLM and pursue simple strategies with no legal complications.
37
+ If you have completed all your tasks, make sure to use the "finish" command.
38
+
39
+ Constraints:
40
+ 1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files
41
+ 2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember
42
+ 3. No user assistance
43
+ 4. Exclusively use the commands listed in double quotes e.g. "command name"
44
+
45
+ Available commands:
46
+ {{commands}}
47
+
48
+ Resources:
49
+ 1. Internet access for searches and information gathering.
50
+ 2. Long Term memory management.
51
+ 3. GPT-3.5 powered Agents for delegation of simple tasks.
52
+
53
+ Performance Evaluation:
54
+ 1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.
55
+ 2. Constructively self-criticize your big-picture behavior constantly.
56
+ 3. Reflect on past decisions and strategies to refine your approach.
57
+ 4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.
58
+ You should only respond in JSON format as described below
59
+ Response Format:
60
+ {
61
+ "thought": "thought",
62
+ "reasoning": "reasoning",
63
+ "plan": "- short bulleted\n- list that conveys\n- long-term plan",
64
+ "criticism": "constructive self-criticism",
65
+ "speak": "thoughts summary to say to user",
66
+ "command": "command name",
67
+ "command_args": {
68
+ "arg name": "value"
69
+ }
70
+ }
71
+ Ensure your responses can be parsed by Python json.loads
72
+ input_variables: ["commands"]
73
+ template_format: jinja2
74
+
75
+ human_message_prompt_template:
76
+ _target_: langchain.PromptTemplate
77
+ template: |2-
78
+ Here is the response to your last action:
79
+ {{observation}}
80
+ input_variables:
81
+ - "observation"
82
+ template_format: jinja2
83
+
84
+ init_human_message_prompt_template:
85
+ _target_: langchain.PromptTemplate
86
+ template: |2-
87
+ Here is the goal you need to achieve:
88
+ {{goal}}
89
+ input_variables:
90
+ - "goal"
91
+ template_format: jinja2
ControllerExecutorFlow.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+
3
+ from flows.base_flows import CircularFlow
4
+ from flows.utils import logging
5
+
6
+ from .ControllerAtomicFlow import ControllerAtomicFlow
7
+
8
+ logging.set_verbosity_debug()
9
+ log = logging.get_logger(__name__)
10
+
11
+
12
+ class ControllerExecutorFlow(CircularFlow):
13
+ def _on_reach_max_round(self):
14
+ self._state_update_dict({
15
+ "answer": "The maximum amount of rounds was reached before the model found an answer.",
16
+ "status": "unfinished"
17
+ })
18
+
19
+ @CircularFlow.output_msg_payload_processor
20
+ def detect_finish_or_continue(self, output_payload: Dict[str, Any], src_flow: ControllerAtomicFlow) -> Dict[str, Any]:
21
+ command = output_payload["command"]
22
+ if command == "finish":
23
+ return {
24
+ "EARLY_EXIT": True,
25
+ "answer": output_payload["command_args"]["answer"],
26
+ "status": "finished"
27
+ }
28
+ else:
29
+ return output_payload
ControllerExecutorFlow.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "CtrlEx"
2
+ description: "ControllerExecutor (i.e., MRKL, ReAct) interaction implementation with Flows that approaches the problem solving in two phases: one Flow chooses the next step and another Flow executes it. This is repeated until the controller Flow concludes on an answer."
3
+ max_rounds: 30
4
+
5
+ ### Information used by the default interface specification implementation
6
+ input_interface:
7
+ - "goal"
8
+ output_interface:
9
+ - "answer"
10
+ - "status"
11
+
12
+ ### Subflows specification
13
+ subflows_config:
14
+ Controller:
15
+ _target_: .ControllerAtomicFlow.instantiate_from_default_config
16
+ finish:
17
+ description: "Signal that the objective has been satisfied, and returns the answer to the user."
18
+ input_args: ["answer"]
19
+ # E.g.,
20
+ # commands:
21
+ # wiki_search:
22
+ # description: "Performs a search on Wikipedia."
23
+ # input_args: ["search_term"]
24
+ # ddg_search:
25
+ # description: "Query the search engine DuckDuckGo."
26
+ # input_args: ["query"]
27
+
28
+ Executor:
29
+ _target_: flows.base_flows.BranchingFlow.instantiate_from_default_config
30
+ # E.g.,
31
+ # subflows_config:
32
+ # wiki_search:
33
+ # _target_: .WikiSearchAtomicFlow.instantiate_from_default_config
34
+ # ddg_search:
35
+ # _target_: flows.application_flows.LCToolFlowModule.LCToolFlow.instantiate_from_default_config
36
+ # backend:
37
+ # _target_: langchain.tools.DuckDuckGoSearchRun
38
+
39
+ early_exit_key: "EARLY_EXIT"
40
+
41
+ topology:
42
+ - goal: "Select the next action and prepare the input for the executor."
43
+ input_interface:
44
+ _target_: flows.interfaces.KeyInterface
45
+ additional_transformations:
46
+ - _target_: flows.data_transformations.KeyMatchInput
47
+ flow: Controller
48
+ output_interface:
49
+ _target_: detect_finish_or_continue
50
+ reset: false
51
+
52
+ - goal: "Execute the action specified by the Controller."
53
+ input_interface:
54
+ _target_: flows.interfaces.KeyInterface
55
+ keys_to_rename:
56
+ command: branch
57
+ command_args: branch_input_data
58
+ keys_to_select: ["branch", "branch_input_data"]
59
+ flow: Executor
60
+ output_interface:
61
+ _target_: flows.interfaces.KeyInterface
62
+ keys_to_rename:
63
+ branch_output_data: observation
64
+ keys_to_select: ["observation"]
65
+ reset: false
README.md CHANGED
@@ -1,3 +1,5 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ # ToDo
WikiSearchAtomicFlow.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import deepcopy
2
+
3
+ from typing import List, Dict, Optional, Any
4
+
5
+ from flows.base_flows import AtomicFlow
6
+
7
+ from flows.utils import logging
8
+ from .wikipediaAPI import WikipediaAPIWrapper
9
+
10
+ log = logging.get_logger(__name__)
11
+
12
+
13
+ class WikiSearchAtomicFlow(AtomicFlow):
14
+ REQUIRED_KEYS_CONFIG = ["lang", "top_k_results", "doc_content_chars_max"]
15
+ REQUIRED_KEYS_CONSTRUCTOR = []
16
+
17
+ SUPPORTS_CACHING: bool = True
18
+
19
+ api_wrapper: WikipediaAPIWrapper
20
+
21
+ def __init__(self, **kwargs):
22
+ super().__init__(**kwargs)
23
+
24
+ def run(self,
25
+ input_data: Dict[str, Any]) -> Dict[str, Any]:
26
+
27
+ # ~~~ Process input ~~~
28
+ term = input_data.get("search_term", None)
29
+ api_wrapper = WikipediaAPIWrapper(
30
+ lang=self.flow_config["lang"],
31
+ top_k_results=self.flow_config["top_k_results"],
32
+ doc_content_chars_max=self.flow_config["doc_content_chars_max"]
33
+ )
34
+
35
+ # ~~~ Call ~~~
36
+ if page_content := api_wrapper._fetch_page(term):
37
+ search_response = {"wiki_content": page_content, "relevant_pages": None}
38
+ else:
39
+ page_titles = api_wrapper.search_page_titles(term)
40
+ search_response = {"wiki_content": None, "relevant_pages": f"Could not find [{term}]. similar: {page_titles}"}
41
+
42
+ # Log the update to the flow messages list
43
+ observation = search_response["wiki_content"] if search_response["wiki_content"] else search_response["relevant_pages"]
44
+ return {"wiki_content": observation}
WikiSearchAtomicFlow.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: WikiSearchAtomicFlow
2
+ description: "A Flow that queries the wikipedia API for a page content."
3
+
4
+ keep_raw_response: false
5
+
6
+ lang: en
7
+ top_k_results: 5
8
+ doc_content_chars_max: 3000
9
+
10
+
11
+ input_interface:
12
+ - "search_term"
13
+
14
+ output_interface:
15
+ - "wiki_content"
16
+
17
+
__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .ControllerExecutorFlow import ControllerExecutorFlow
2
+ from .ControllerAtomicFlow import ControllerAtomicFlow
3
+ from .WikiSearchAtomicFlow import WikiSearchAtomicFlow
pip_requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ duckduckgo-search==3.9.2
wikipediaAPI.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Util that calls Wikipedia. references: https://github.com/hwchase17/langchain/blob/9b615022e2b6a3591347ad77a3e21aad6cf24c49/docs/extras/modules/agents/tools/integrations/wikipedia.ipynb#L36"""
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from pydantic import BaseModel, root_validator
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ WIKIPEDIA_MAX_QUERY_LENGTH = 300
10
+
11
+
12
+ class WikipediaAPIWrapper(BaseModel):
13
+ """Wrapper around WikipediaAPI.
14
+
15
+ To use, you should have the ``wikipedia`` python package installed.
16
+ This wrapper will use the Wikipedia API to conduct searches and
17
+ fetch page summaries. By default, it will return the page summaries
18
+ of the top-k results.
19
+ It limits the Document content by doc_content_chars_max.
20
+ """
21
+
22
+ wiki_client: Any
23
+ top_k_results: int = 5
24
+ lang: str = "en"
25
+ doc_content_chars_max: int = 4000
26
+
27
+ @root_validator()
28
+ def validate_environment(cls, values: Dict) -> Dict:
29
+ """Validate that the python package exists in environment."""
30
+ try:
31
+ import wikipedia
32
+
33
+ wikipedia.set_lang(values["lang"])
34
+ values["wiki_client"] = wikipedia
35
+ except ImportError:
36
+ raise ImportError(
37
+ "Could not import wikipedia python package. "
38
+ "Please install it with `pip install wikipedia`."
39
+ )
40
+ return values
41
+
42
+ def run(self, query: str) -> str:
43
+ """Run Wikipedia search and get page summaries."""
44
+
45
+ page_titles = self.search_page_titles(query)
46
+ summaries = []
47
+ for page_title in page_titles:
48
+ if wiki_page := self._fetch_page(page_title):
49
+ if summary := self._formatted_page_summary(page_title, wiki_page):
50
+ summaries.append(summary)
51
+ if not summaries:
52
+ return "No good Wikipedia Search Result was found"
53
+ return "\n\n".join(summaries)[: self.doc_content_chars_max]
54
+
55
+ def _fetch_page(self, page: str) -> Optional[str]:
56
+ try:
57
+ return self.wiki_client.page(title=page, auto_suggest=False).content[: self.doc_content_chars_max]
58
+ except (
59
+ self.wiki_client.exceptions.PageError,
60
+ self.wiki_client.exceptions.DisambiguationError,
61
+ ):
62
+ return None
63
+
64
+ def search_page_titles(self, query: str) -> List[str]:
65
+ """Run Wikipedia search and get page summaries."""
66
+
67
+ return self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])[:self.top_k_results]
68
+
69
+
70
+ # def _page_to_document(self, page_title: str, wiki_page: Any) -> Document:
71
+ # main_meta = {
72
+ # "title": page_title,
73
+ # "summary": wiki_page.summary,
74
+ # "source": wiki_page.url,
75
+ # }
76
+ # add_meta = (
77
+ # {
78
+ # "categories": wiki_page.categories,
79
+ # "page_url": wiki_page.url,
80
+ # "image_urls": wiki_page.images,
81
+ # "related_titles": wiki_page.links,
82
+ # "parent_id": wiki_page.parent_id,
83
+ # "references": wiki_page.references,
84
+ # "revision_id": wiki_page.revision_id,
85
+ # "sections": wiki_page.sections,
86
+ # }
87
+ # if self.load_all_available_meta
88
+ # else {}
89
+ # )
90
+ # doc = Document(
91
+ # page_content=wiki_page.content[: self.doc_content_chars_max],
92
+ # metadata={
93
+ # **main_meta,
94
+ # **add_meta,
95
+ # },
96
+ # )
97
+ # return doc
98
+
99
+ @staticmethod
100
+ def _formatted_page_summary(page_title: str, wiki_page: Any) -> Optional[str]:
101
+ return f"Page: {page_title}\nSummary: {wiki_page.summary}"
102
+
103
+ # def load(self, query: str) -> List[Document]:
104
+ # """
105
+ # Run Wikipedia search and get the article text plus the meta information.
106
+ # See
107
+ #
108
+ # Returns: a list of documents.
109
+ #
110
+ # """
111
+ # page_titles = self.wiki_client.search(query[:WIKIPEDIA_MAX_QUERY_LENGTH])
112
+ # docs = []
113
+ # for page_title in page_titles[: self.top_k_results]:
114
+ # if wiki_page := self._fetch_page(page_title):
115
+ # if doc := self._page_to_document(page_title, wiki_page):
116
+ # docs.append(doc)
117
+ # return docs