Spaces:

JohnSmith9982
/

ChuanhuChatGPT

Running on CPU Upgrade

App Files Files Community

101

JohnSmith9982 commited on Jun 14, 2023

Commit

b28a1a9

•

1 Parent(s): ac74dc0

Upload 85 files

Browse files

Files changed (39) hide show

ChuanhuChatbot.py +8 -9
README.md +1 -1
assets/custom.css +80 -72
assets/custom.js +4 -4
history/2023-06-14_15-05-04.json +0 -0
modules/__pycache__/config.cpython-311.pyc +0 -0
modules/__pycache__/config.cpython-39.pyc +0 -0
modules/__pycache__/index_func.cpython-311.pyc +0 -0
modules/__pycache__/index_func.cpython-39.pyc +0 -0
modules/__pycache__/llama_func.cpython-39.pyc +0 -0
modules/__pycache__/overwrites.cpython-311.pyc +0 -0
modules/__pycache__/overwrites.cpython-39.pyc +0 -0
modules/__pycache__/pdf_func.cpython-311.pyc +0 -0
modules/__pycache__/pdf_func.cpython-39.pyc +0 -0
modules/__pycache__/presets.cpython-311.pyc +0 -0
modules/__pycache__/presets.cpython-39.pyc +0 -0
modules/__pycache__/shared.cpython-311.pyc +0 -0
modules/__pycache__/shared.cpython-39.pyc +0 -0
modules/__pycache__/utils.cpython-311.pyc +0 -0
modules/__pycache__/utils.cpython-39.pyc +0 -0
modules/config.py +16 -12
modules/index_func.py +141 -0
modules/models/ChuanhuAgent.py +216 -0
modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc +0 -0
modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc +0 -0
modules/models/__pycache__/base_model.cpython-311.pyc +0 -0
modules/models/__pycache__/base_model.cpython-39.pyc +0 -0
modules/models/__pycache__/minimax.cpython-39.pyc +0 -0
modules/models/__pycache__/models.cpython-311.pyc +0 -0
modules/models/__pycache__/models.cpython-39.pyc +0 -0
modules/models/base_model.py +140 -49
modules/models/minimax.py +161 -0
modules/models/models.py +13 -6
modules/overwrites.py +20 -28
modules/pdf_func.py +7 -7
modules/presets.py +21 -14
modules/shared.py +17 -8
modules/utils.py +75 -11
requirements.txt +13 -6

ChuanhuChatbot.py CHANGED Viewed

@@ -12,10 +12,10 @@ from modules.presets import *
 from modules.overwrites import *
 from modules.models.models import get_model
 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
-PromptHelper.compact_text_chunks = compact_text_chunks
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
@@ -89,7 +89,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     with gr.Row():
                         single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
                         use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
-                        # render_latex_checkbox = gr.Checkbox(label=i18n("渲染LaTeX公式"), value=render_latex, interactive=True, elem_id="render_latex_checkbox")
                     language_select_dropdown = gr.Dropdown(
                         label=i18n("选择回复语言（针对搜索&索引功能）"),
                         choices=REPLY_LANGUAGES,
@@ -98,6 +97,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     )
                     index_files = gr.Files(label=i18n("上传"), type="file")
                     two_column = gr.Checkbox(label=i18n("双栏pdf"), value=advance_docs["pdf"].get("two_column", False))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label=i18n("识别公式"), value=advance_docs["pdf"].get("formula_ocr", False))
@@ -161,7 +161,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 with gr.Tab(label=i18n("高级")):
                     gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
-                    gr.HTML(APPEARANCE_SWITCHER, elem_classes="insert_block")
                     use_streaming_checkbox = gr.Checkbox(
                             label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
                         )
@@ -265,7 +265,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         default_btn = gr.Button(i18n("🔙 恢复默认设置"))
     gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
-    gr.HTML(FOOTER.format(versions=versions_html()), elem_id="footer")
     # https://github.com/gradio-app/gradio/pull/3296
     def create_greeting(request: gr.Request):
@@ -333,7 +333,8 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
     submitBtn.click(**get_usage_args)
-    index_files.change(handle_file_upload, [current_model, index_files, chatbot], [index_files, chatbot, status_display])
     emptyBtn.click(
         reset,
@@ -467,8 +468,6 @@ demo.title = i18n("川虎Chat 🚀")
 if __name__ == "__main__":
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
-        favicon_path="./assets/favicon.ico",
     )
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(auth=("在这里填写用户名", "在这里填写密码")) # 适合Nginx反向代理

 from modules.overwrites import *
 from modules.models.models import get_model
+logging.getLogger("httpx").setLevel(logging.WARNING)
 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
                     with gr.Row():
                         single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
                         use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
                     language_select_dropdown = gr.Dropdown(
                         label=i18n("选择回复语言（针对搜索&索引功能）"),
                         choices=REPLY_LANGUAGES,
                     )
                     index_files = gr.Files(label=i18n("上传"), type="file")
                     two_column = gr.Checkbox(label=i18n("双栏pdf"), value=advance_docs["pdf"].get("two_column", False))
+                    summarize_btn = gr.Button(i18n("总结"))
                     # TODO: 公式ocr
                     # formula_ocr = gr.Checkbox(label=i18n("识别公式"), value=advance_docs["pdf"].get("formula_ocr", False))
                 with gr.Tab(label=i18n("高级")):
                     gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
+                    gr.HTML(get_html("appearance_switcher.html").format(label=i18n("切换亮暗色主题")), elem_classes="insert_block")
                     use_streaming_checkbox = gr.Checkbox(
                             label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
                         )
                         default_btn = gr.Button(i18n("🔙 恢复默认设置"))
     gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
+    gr.HTML(get_html("footer.html").format(versions=versions_html()), elem_id="footer")
     # https://github.com/gradio-app/gradio/pull/3296
     def create_greeting(request: gr.Request):
     submitBtn.click(**transfer_input_args).then(**chatgpt_predict_args, api_name="predict").then(**end_outputing_args)
     submitBtn.click(**get_usage_args)
+    index_files.change(handle_file_upload, [current_model, index_files, chatbot, language_select_dropdown], [index_files, chatbot, status_display])
+    summarize_btn.click(handle_summarize_index, [current_model, index_files, chatbot, language_select_dropdown], [chatbot, status_display])
     emptyBtn.click(
         reset,
 if __name__ == "__main__":
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
+        blocked_paths=["config.json"],
+        favicon_path="./assets/favicon.ico"
     )

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🐯
 colorFrom: green
 colorTo: red
 sdk: gradio
-sdk_version: 3.28.0
 app_file: ChuanhuChatbot.py
 pinned: false
 license: gpl-3.0

 colorFrom: green
 colorTo: red
 sdk: gradio
+sdk_version: 3.33.1
 app_file: ChuanhuChatbot.py
 pinned: false
 license: gpl-3.0

assets/custom.css CHANGED Viewed

@@ -405,7 +405,7 @@ thead th {
     padding: .5em .2em;
 }
 /* 行内代码 */
-code {
     display: inline;
     white-space: break-spaces;
     border-radius: 6px;
@@ -414,13 +414,13 @@ code {
     background-color: rgba(175,184,193,0.2);
 }
 /* 代码块 */
-pre code {
     display: block;
     overflow: auto;
     white-space: pre;
     background-color: hsla(0, 0%, 0%, 80%)!important;
     border-radius: 10px;
-    padding: 1.4em 1.2em 0em 1.4em;
     margin: 0.6em 2em 1em 0.2em;
     color: #FFF;
     box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
@@ -428,73 +428,81 @@ pre code {
 .message pre {
     padding: 0 !important;
 }
 /* 代码高亮样式 */
-.highlight .hll { background-color: #49483e }
-.highlight .c { color: #75715e } /* Comment */
-.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
-.highlight .k { color: #66d9ef } /* Keyword */
-.highlight .l { color: #ae81ff } /* Literal */
-.highlight .n { color: #f8f8f2 } /* Name */
-.highlight .o { color: #f92672 } /* Operator */
-.highlight .p { color: #f8f8f2 } /* Punctuation */
-.highlight .ch { color: #75715e } /* Comment.Hashbang */
-.highlight .cm { color: #75715e } /* Comment.Multiline */
-.highlight .cp { color: #75715e } /* Comment.Preproc */
-.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
-.highlight .c1 { color: #75715e } /* Comment.Single */
-.highlight .cs { color: #75715e } /* Comment.Special */
-.highlight .gd { color: #f92672 } /* Generic.Deleted */
-.highlight .ge { font-style: italic } /* Generic.Emph */
-.highlight .gi { color: #a6e22e } /* Generic.Inserted */
-.highlight .gs { font-weight: bold } /* Generic.Strong */
-.highlight .gu { color: #75715e } /* Generic.Subheading */
-.highlight .kc { color: #66d9ef } /* Keyword.Constant */
-.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
-.highlight .kn { color: #f92672 } /* Keyword.Namespace */
-.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
-.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
-.highlight .kt { color: #66d9ef } /* Keyword.Type */
-.highlight .ld { color: #e6db74 } /* Literal.Date */
-.highlight .m { color: #ae81ff } /* Literal.Number */
-.highlight .s { color: #e6db74 } /* Literal.String */
-.highlight .na { color: #a6e22e } /* Name.Attribute */
-.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
-.highlight .nc { color: #a6e22e } /* Name.Class */
-.highlight .no { color: #66d9ef } /* Name.Constant */
-.highlight .nd { color: #a6e22e } /* Name.Decorator */
-.highlight .ni { color: #f8f8f2 } /* Name.Entity */
-.highlight .ne { color: #a6e22e } /* Name.Exception */
-.highlight .nf { color: #a6e22e } /* Name.Function */
-.highlight .nl { color: #f8f8f2 } /* Name.Label */
-.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
-.highlight .nx { color: #a6e22e } /* Name.Other */
-.highlight .py { color: #f8f8f2 } /* Name.Property */
-.highlight .nt { color: #f92672 } /* Name.Tag */
-.highlight .nv { color: #f8f8f2 } /* Name.Variable */
-.highlight .ow { color: #f92672 } /* Operator.Word */
-.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
-.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
-.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
-.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
-.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
-.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
-.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
-.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
-.highlight .sc { color: #e6db74 } /* Literal.String.Char */
-.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
-.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
-.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
-.highlight .se { color: #ae81ff } /* Literal.String.Escape */
-.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
-.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
-.highlight .sx { color: #e6db74 } /* Literal.String.Other */
-.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
-.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
-.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
-.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
-.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
-.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
-.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
-.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
-.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
-.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */

     padding: .5em .2em;
 }
 /* 行内代码 */
+.message :not(pre) code {
     display: inline;
     white-space: break-spaces;
     border-radius: 6px;
     background-color: rgba(175,184,193,0.2);
 }
 /* 代码块 */
+.message pre code {
     display: block;
     overflow: auto;
     white-space: pre;
     background-color: hsla(0, 0%, 0%, 80%)!important;
     border-radius: 10px;
+    padding: 1.2em 1em 0em .5em;
     margin: 0.6em 2em 1em 0.2em;
     color: #FFF;
     box-shadow: 6px 6px 16px hsla(0, 0%, 0%, 0.2);
 .message pre {
     padding: 0 !important;
 }
+.message pre code div.highlight {
+    background-color: unset !important;
+}
+button.copy-button {
+    display: none;
+}
 /* 代码高亮样式 */
+.highlight .hll { background-color: #49483e !important }
+.highlight .c { color: #75715e !important } /* Comment */
+.highlight .err { color: #960050 !important; background-color: #1e0010 } /* Error */
+.highlight .k { color: #66d9ef  !important} /* Keyword */
+.highlight .l { color: #ae81ff  !important} /* Literal */
+.highlight .n { color: #f8f8f2  !important} /* Name */
+.highlight .o { color: #f92672  !important} /* Operator */
+.highlight .p { color: #f8f8f2  !important} /* Punctuation */
+.highlight .ch { color: #75715e  !important} /* Comment.Hashbang */
+.highlight .cm { color: #75715e  !important} /* Comment.Multiline */
+.highlight .cp { color: #75715e  !important} /* Comment.Preproc */
+.highlight .cpf { color: #75715e  !important} /* Comment.PreprocFile */
+.highlight .c1 { color: #75715e  !important} /* Comment.Single */
+.highlight .cs { color: #75715e  !important} /* Comment.Special */
+.highlight .gd { color: #f92672  !important} /* Generic.Deleted */
+.highlight .ge { font-style: italic  !important} /* Generic.Emph */
+.highlight .gi { color: #a6e22e  !important} /* Generic.Inserted */
+.highlight .gs { font-weight: bold  !important} /* Generic.Strong */
+.highlight .gu { color: #75715e  !important} /* Generic.Subheading */
+.highlight .kc { color: #66d9ef  !important} /* Keyword.Constant */
+.highlight .kd { color: #66d9ef  !important} /* Keyword.Declaration */
+.highlight .kn { color: #f92672  !important} /* Keyword.Namespace */
+.highlight .kp { color: #66d9ef  !important} /* Keyword.Pseudo */
+.highlight .kr { color: #66d9ef  !important} /* Keyword.Reserved */
+.highlight .kt { color: #66d9ef  !important} /* Keyword.Type */
+.highlight .ld { color: #e6db74  !important} /* Literal.Date */
+.highlight .m { color: #ae81ff  !important} /* Literal.Number */
+.highlight .s { color: #e6db74  !important} /* Literal.String */
+.highlight .na { color: #a6e22e  !important} /* Name.Attribute */
+.highlight .nb { color: #f8f8f2  !important} /* Name.Builtin */
+.highlight .nc { color: #a6e22e  !important} /* Name.Class */
+.highlight .no { color: #66d9ef  !important} /* Name.Constant */
+.highlight .nd { color: #a6e22e  !important} /* Name.Decorator */
+.highlight .ni { color: #f8f8f2  !important} /* Name.Entity */
+.highlight .ne { color: #a6e22e  !important} /* Name.Exception */
+.highlight .nf { color: #a6e22e  !important} /* Name.Function */
+.highlight .nl { color: #f8f8f2  !important} /* Name.Label */
+.highlight .nn { color: #f8f8f2  !important} /* Name.Namespace */
+.highlight .nx { color: #a6e22e  !important} /* Name.Other */
+.highlight .py { color: #f8f8f2  !important} /* Name.Property */
+.highlight .nt { color: #f92672  !important} /* Name.Tag */
+.highlight .nv { color: #f8f8f2  !important} /* Name.Variable */
+.highlight .ow { color: #f92672  !important} /* Operator.Word */
+.highlight .w { color: #f8f8f2  !important} /* Text.Whitespace */
+.highlight .mb { color: #ae81ff  !important} /* Literal.Number.Bin */
+.highlight .mf { color: #ae81ff  !important} /* Literal.Number.Float */
+.highlight .mh { color: #ae81ff  !important} /* Literal.Number.Hex */
+.highlight .mi { color: #ae81ff  !important} /* Literal.Number.Integer */
+.highlight .mo { color: #ae81ff  !important} /* Literal.Number.Oct */
+.highlight .sa { color: #e6db74  !important} /* Literal.String.Affix */
+.highlight .sb { color: #e6db74  !important} /* Literal.String.Backtick */
+.highlight .sc { color: #e6db74  !important} /* Literal.String.Char */
+.highlight .dl { color: #e6db74  !important} /* Literal.String.Delimiter */
+.highlight .sd { color: #e6db74  !important} /* Literal.String.Doc */
+.highlight .s2 { color: #e6db74  !important} /* Literal.String.Double */
+.highlight .se { color: #ae81ff  !important} /* Literal.String.Escape */
+.highlight .sh { color: #e6db74  !important} /* Literal.String.Heredoc */
+.highlight .si { color: #e6db74  !important} /* Literal.String.Interpol */
+.highlight .sx { color: #e6db74  !important} /* Literal.String.Other */
+.highlight .sr { color: #e6db74  !important} /* Literal.String.Regex */
+.highlight .s1 { color: #e6db74  !important} /* Literal.String.Single */
+.highlight .ss { color: #e6db74  !important} /* Literal.String.Symbol */
+.highlight .bp { color: #f8f8f2  !important} /* Name.Builtin.Pseudo */
+.highlight .fm { color: #a6e22e  !important} /* Name.Function.Magic */
+.highlight .vc { color: #f8f8f2  !important} /* Name.Variable.Class */
+.highlight .vg { color: #f8f8f2  !important} /* Name.Variable.Global */
+.highlight .vi { color: #f8f8f2  !important} /* Name.Variable.Instance */
+.highlight .vm { color: #f8f8f2  !important} /* Name.Variable.Magic */
+.highlight .il { color: #ae81ff  !important} /* Literal.Number.Integer.Long */

assets/custom.js CHANGED Viewed

@@ -245,11 +245,11 @@ function showOrHideUserInfo() {
 function toggleDarkMode(isEnabled) {
     if (isEnabled) {
-        gradioContainer.classList.add("dark");
-        document.body.style.setProperty("background-color", "var(--neutral-950)", "important");
     } else {
-        gradioContainer.classList.remove("dark");
-        document.body.style.backgroundColor = "";
     }
 }
 function adjustDarkMode() {

 function toggleDarkMode(isEnabled) {
     if (isEnabled) {
+        document.body.classList.add("dark");
+        // document.body.style.setProperty("background-color", "var(--neutral-950)", "important");
     } else {
+        document.body.classList.remove("dark");
+        // document.body.style.backgroundColor = "";
     }
 }
 function adjustDarkMode() {

history/2023-06-14_15-05-04.json ADDED Viewed

File without changes

modules/__pycache__/config.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/config.cpython-311.pyc and b/modules/__pycache__/config.cpython-311.pyc differ

modules/__pycache__/config.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ

modules/__pycache__/index_func.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/index_func.cpython-311.pyc and b/modules/__pycache__/index_func.cpython-311.pyc differ

modules/__pycache__/index_func.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ

modules/__pycache__/llama_func.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/llama_func.cpython-39.pyc and b/modules/__pycache__/llama_func.cpython-39.pyc differ

modules/__pycache__/overwrites.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/overwrites.cpython-311.pyc and b/modules/__pycache__/overwrites.cpython-311.pyc differ

modules/__pycache__/overwrites.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ

modules/__pycache__/pdf_func.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/pdf_func.cpython-311.pyc and b/modules/__pycache__/pdf_func.cpython-311.pyc differ

modules/__pycache__/pdf_func.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/pdf_func.cpython-39.pyc and b/modules/__pycache__/pdf_func.cpython-39.pyc differ

modules/__pycache__/presets.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/presets.cpython-311.pyc and b/modules/__pycache__/presets.cpython-311.pyc differ

modules/__pycache__/presets.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ

modules/__pycache__/shared.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/shared.cpython-311.pyc and b/modules/__pycache__/shared.cpython-311.pyc differ

modules/__pycache__/shared.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/shared.cpython-39.pyc and b/modules/__pycache__/shared.cpython-39.pyc differ

modules/__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/utils.cpython-311.pyc and b/modules/__pycache__/utils.cpython-311.pyc differ

modules/__pycache__/utils.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/utils.cpython-39.pyc and b/modules/__pycache__/utils.cpython-39.pyc differ

modules/config.py CHANGED Viewed

@@ -18,13 +18,13 @@ __all__ = [
     "log_level",
     "advance_docs",
     "update_doc_config",
-    "render_latex",
     "usage_limit",
     "multi_api_key",
     "server_name",
     "server_port",
     "share",
-    "hide_history_when_not_logged_in"
 ]
 # 添加一个统一的config文件，避免文件过多造成的疑惑（优先级最低）
@@ -42,11 +42,11 @@ hide_history_when_not_logged_in = config.get("hide_history_when_not_logged_in",
 if os.path.exists("api_key.txt"):
     logging.info("检测到api_key.txt文件，正在进行迁移...")
-    with open("api_key.txt", "r") as f:
         config["openai_api_key"] = f.read().strip()
     os.rename("api_key.txt", "api_key(deprecated).txt")
     with open("config.json", "w", encoding='utf-8') as f:
-        json.dump(config, f, indent=4)
 if os.path.exists("auth.json"):
     logging.info("检测到auth.json文件，正在进行迁移...")
@@ -62,7 +62,7 @@ if os.path.exists("auth.json"):
     config["users"] = auth_list
     os.rename("auth.json", "auth(deprecated).json")
     with open("config.json", "w", encoding='utf-8') as f:
-        json.dump(config, f, indent=4)
 ## 处理docker if we are running in Docker
 dockerflag = config.get("dockerflag", False)
@@ -76,12 +76,11 @@ my_api_key = os.environ.get("OPENAI_API_KEY", my_api_key)
 xmchat_api_key = config.get("xmchat_api_key", "")
 os.environ["XMCHAT_API_KEY"] = xmchat_api_key
-render_latex = config.get("render_latex", True)
-if render_latex:
-    os.environ["RENDER_LATEX"] = "yes"
-else:
-    os.environ["RENDER_LATEX"] = "no"
 usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
@@ -98,10 +97,15 @@ auth_list = config.get("users", []) # 实际上是使用者的列表
 authflag = len(auth_list) > 0  # 是否开启认证的状态值，改为判断auth_list长度
 # 处理自定义的api_host，优先读环境变量的配置，如果存在则自动装配
-api_host = os.environ.get("api_host", config.get("api_host", ""))
-if api_host:
     shared.state.set_api_host(api_host)
 @contextmanager
 def retrieve_openai_api(api_key = None):
     old_api_key = os.environ.get("OPENAI_API_KEY", "")

     "log_level",
     "advance_docs",
     "update_doc_config",
     "usage_limit",
     "multi_api_key",
     "server_name",
     "server_port",
     "share",
+    "hide_history_when_not_logged_in",
+    "default_chuanhu_assistant_model"
 ]
 # 添加一个统一的config文件，避免文件过多造成的疑惑（优先级最低）
 if os.path.exists("api_key.txt"):
     logging.info("检测到api_key.txt文件，正在进行迁移...")
+    with open("api_key.txt", "r", encoding="utf-8") as f:
         config["openai_api_key"] = f.read().strip()
     os.rename("api_key.txt", "api_key(deprecated).txt")
     with open("config.json", "w", encoding='utf-8') as f:
+        json.dump(config, f, indent=4, ensure_ascii=False)
 if os.path.exists("auth.json"):
     logging.info("检测到auth.json文件，正在进行迁移...")
     config["users"] = auth_list
     os.rename("auth.json", "auth(deprecated).json")
     with open("config.json", "w", encoding='utf-8') as f:
+        json.dump(config, f, indent=4, ensure_ascii=False)
 ## 处理docker if we are running in Docker
 dockerflag = config.get("dockerflag", False)
 xmchat_api_key = config.get("xmchat_api_key", "")
 os.environ["XMCHAT_API_KEY"] = xmchat_api_key
+minimax_api_key = config.get("minimax_api_key", "")
+os.environ["MINIMAX_API_KEY"] = minimax_api_key
+minimax_group_id = config.get("minimax_group_id", "")
+os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
 usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
 authflag = len(auth_list) > 0  # 是否开启认证的状态值，改为判断auth_list长度
 # 处理自定义的api_host，优先读环境变量的配置，如果存在则自动装配
+api_host = os.environ.get("OPENAI_API_BASE", config.get("openai_api_base", None))
+if api_host is not None:
     shared.state.set_api_host(api_host)
+default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
+for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
+    if config.get(x, None) is not None:
+        os.environ[x] = config[x]
 @contextmanager
 def retrieve_openai_api(api_key = None):
     old_api_key = os.environ.get("OPENAI_API_KEY", "")

modules/index_func.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+import logging
+import colorama
+import PyPDF2
+from tqdm import tqdm
+from modules.presets import *
+from modules.utils import *
+from modules.config import local_embedding
+def get_index_name(file_src):
+    file_paths = [x.name for x in file_src]
+    file_paths.sort(key=lambda x: os.path.basename(x))
+    md5_hash = hashlib.md5()
+    for file_path in file_paths:
+        with open(file_path, "rb", encoding="utf-8") as f:
+            while chunk := f.read(8192):
+                md5_hash.update(chunk)
+    return md5_hash.hexdigest()
+def get_documents(file_src):
+    from langchain.schema import Document
+    from langchain.text_splitter import TokenTextSplitter
+    text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
+    documents = []
+    logging.debug("Loading documents...")
+    logging.debug(f"file_src: {file_src}")
+    for file in file_src:
+        filepath = file.name
+        filename = os.path.basename(filepath)
+        file_type = os.path.splitext(filename)[1]
+        logging.info(f"loading file: {filename}")
+        try:
+            if file_type == ".pdf":
+                logging.debug("Loading PDF...")
+                try:
+                    from modules.pdf_func import parse_pdf
+                    from modules.config import advance_docs
+                    two_column = advance_docs["pdf"].get("two_column", False)
+                    pdftext = parse_pdf(filepath, two_column).text
+                except:
+                    pdftext = ""
+                    with open(filepath, "rb", encoding="utf-8") as pdfFileObj:
+                        pdfReader = PyPDF2.PdfReader(pdfFileObj)
+                        for page in tqdm(pdfReader.pages):
+                            pdftext += page.extract_text()
+                texts = [Document(page_content=pdftext, metadata={"source": filepath})]
+            elif file_type == ".docx":
+                logging.debug("Loading Word...")
+                from langchain.document_loaders import UnstructuredWordDocumentLoader
+                loader = UnstructuredWordDocumentLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".pptx":
+                logging.debug("Loading PowerPoint...")
+                from langchain.document_loaders import UnstructuredPowerPointLoader
+                loader = UnstructuredPowerPointLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".epub":
+                logging.debug("Loading EPUB...")
+                from langchain.document_loaders import UnstructuredEPubLoader
+                loader = UnstructuredEPubLoader(filepath)
+                texts = loader.load()
+            elif file_type == ".xlsx":
+                logging.debug("Loading Excel...")
+                text_list = excel_to_string(filepath)
+                texts = []
+                for elem in text_list:
+                    texts.append(Document(page_content=elem, metadata={"source": filepath}))
+            else:
+                logging.debug("Loading text file...")
+                from langchain.document_loaders import TextLoader
+                loader = TextLoader(filepath, "utf8")
+                texts = loader.load()
+        except Exception as e:
+            import traceback
+            logging.error(f"Error loading file: {filename}")
+            traceback.print_exc()
+        texts = text_splitter.split_documents(texts)
+        documents.extend(texts)
+    logging.debug("Documents loaded.")
+    return documents
+def construct_index(
+    api_key,
+    file_src,
+    max_input_size=4096,
+    num_outputs=5,
+    max_chunk_overlap=20,
+    chunk_size_limit=600,
+    embedding_limit=None,
+    separator=" ",
+):
+    from langchain.chat_models import ChatOpenAI
+    from langchain.vectorstores import FAISS
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
+    else:
+        # 由于一个依赖的愚蠢的设计，这里必须要有一个API KEY
+        os.environ["OPENAI_API_KEY"] = "sk-xxxxxxx"
+    chunk_size_limit = None if chunk_size_limit == 0 else chunk_size_limit
+    embedding_limit = None if embedding_limit == 0 else embedding_limit
+    separator = " " if separator == "" else separator
+    index_name = get_index_name(file_src)
+    index_path = f"./index/{index_name}"
+    if local_embedding:
+        from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+        embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2")
+    else:
+        from langchain.embeddings import OpenAIEmbeddings
+        embeddings = OpenAIEmbeddings(openai_api_base=os.environ.get("OPENAI_API_BASE", None), openai_api_key=os.environ.get("OPENAI_EMBEDDING_API_KEY", api_key))
+    if os.path.exists(index_path):
+        logging.info("找到了缓存的索引文件，加载中……")
+        return FAISS.load_local(index_path, embeddings)
+    else:
+        try:
+            documents = get_documents(file_src)
+            logging.info("构建索引中……")
+            with retrieve_proxy():
+                index = FAISS.from_documents(documents, embeddings)
+            logging.debug("索引构建完成！")
+            os.makedirs("./index", exist_ok=True)
+            index.save_local(index_path)
+            logging.debug("索引已保存至本地!")
+            return index
+        except Exception as e:
+            import traceback
+            logging.error("索引构建失败！%s", e)
+            traceback.print_exc()
+            return None

modules/models/ChuanhuAgent.py ADDED Viewed

	@@ -0,0 +1,216 @@

+from langchain.chains.summarize import load_summarize_chain
+from langchain import PromptTemplate, LLMChain
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain.text_splitter import TokenTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents import AgentType
+from langchain.docstore.document import Document
+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+from langchain.callbacks.stdout import StdOutCallbackHandler
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.callbacks.manager import BaseCallbackManager
+from duckduckgo_search import DDGS
+from itertools import islice
+from typing import Any, Dict, List, Optional, Union
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.input import print_text
+from langchain.schema import AgentAction, AgentFinish, LLMResult
+from pydantic import BaseModel, Field
+import requests
+from bs4 import BeautifulSoup
+from threading import Thread, Condition
+from collections import deque
+from .base_model import BaseLLMModel, CallbackToIterator, ChuanhuCallbackHandler
+from ..config import default_chuanhu_assistant_model
+from ..presets import SUMMARIZE_PROMPT, i18n
+from ..index_func import construct_index
+from langchain.callbacks import get_openai_callback
+import os
+import gradio as gr
+import logging
+class GoogleSearchInput(BaseModel):
+    keywords: str = Field(description="keywords to search")
+class WebBrowsingInput(BaseModel):
+    url: str = Field(description="URL of a webpage")
+class WebAskingInput(BaseModel):
+    url: str = Field(description="URL of a webpage")
+    question: str = Field(description="Question that you want to know the answer to, based on the webpage's content.")
+class ChuanhuAgent_Client(BaseLLMModel):
+    def __init__(self, model_name, openai_api_key, user_name="") -> None:
+        super().__init__(model_name=model_name, user=user_name)
+        self.text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=30)
+        self.api_key = openai_api_key
+        self.llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name=default_chuanhu_assistant_model, openai_api_base=os.environ.get("OPENAI_API_BASE", None))
+        self.cheap_llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0, model_name="gpt-3.5-turbo", openai_api_base=os.environ.get("OPENAI_API_BASE", None))
+        PROMPT = PromptTemplate(template=SUMMARIZE_PROMPT, input_variables=["text"])
+        self.summarize_chain = load_summarize_chain(self.cheap_llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
+        self.index_summary = None
+        self.index = None
+        if "Pro" in self.model_name:
+            self.tools = load_tools(["google-search-results-json", "llm-math", "arxiv", "wikipedia", "wolfram-alpha"], llm=self.llm)
+        else:
+            self.tools = load_tools(["ddg-search", "llm-math", "arxiv", "wikipedia"], llm=self.llm)
+            self.tools.append(
+                Tool.from_function(
+                    func=self.google_search_simple,
+                    name="Google Search JSON",
+                    description="useful when you need to search the web.",
+                    args_schema=GoogleSearchInput
+                )
+            )
+        self.tools.append(
+            Tool.from_function(
+                func=self.summary_url,
+                name="Summary Webpage",
+                description="useful when you need to know the overall content of a webpage.",
+                args_schema=WebBrowsingInput
+            )
+        )
+        self.tools.append(
+            StructuredTool.from_function(
+                func=self.ask_url,
+                name="Ask Webpage",
+                description="useful when you need to ask detailed questions about a webpage.",
+                args_schema=WebAskingInput
+            )
+        )
+    def google_search_simple(self, query):
+        results = []
+        with DDGS() as ddgs:
+            ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
+            for r in islice(ddgs_gen, 10):
+                results.append({
+                    "title": r["title"],
+                    "link": r["href"],
+                    "snippet": r["body"]
+                })
+        return str(results)
+    def handle_file_upload(self, files, chatbot, language):
+        """if the model accepts multi modal input, implement this function"""
+        status = gr.Markdown.update()
+        if files:
+            index = construct_index(self.api_key, file_src=files)
+            assert index is not None, "获取索引失败"
+            self.index = index
+            status = i18n("索引构建完成")
+            # Summarize the document
+            logging.info(i18n("生成内容总结中……"))
+            with get_openai_callback() as cb:
+                os.environ["OPENAI_API_KEY"] = self.api_key
+                from langchain.chains.summarize import load_summarize_chain
+                from langchain.prompts import PromptTemplate
+                from langchain.chat_models import ChatOpenAI
+                prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
+                PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
+                llm = ChatOpenAI()
+                chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
+                summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
+                logging.info(f"Summary: {summary}")
+                self.index_summary = summary
+                chatbot.append((f"Uploaded {len(files)} files", summary))
+            logging.info(cb)
+        return gr.Files.update(), chatbot, status
+    def query_index(self, query):
+        if self.index is not None:
+            retriever = self.index.as_retriever()
+            qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever)
+            return qa.run(query)
+        else:
+            "Error during query."
+    def summary(self, text):
+        texts = Document(page_content=text)
+        texts = self.text_splitter.split_documents([texts])
+        return self.summarize_chain({"input_documents": texts}, return_only_outputs=True)["output_text"]
+    def fetch_url_content(self, url):
+        response = requests.get(url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # 提取所有的文本
+        text = ''.join(s.getText() for s in soup.find_all('p'))
+        logging.info(f"Extracted text from {url}")
+        return text
+    def summary_url(self, url):
+        text = self.fetch_url_content(url)
+        if text == "":
+            return "URL unavailable."
+        text_summary = self.summary(text)
+        url_content = "webpage content summary:\n" + text_summary
+        return url_content
+    def ask_url(self, url, question):
+        text = self.fetch_url_content(url)
+        if text == "":
+            return "URL unavailable."
+        texts = Document(page_content=text)
+        texts = self.text_splitter.split_documents([texts])
+        # use embedding
+        embeddings = OpenAIEmbeddings(openai_api_key=self.api_key, openai_api_base=os.environ.get("OPENAI_API_BASE", None))
+        # create vectorstore
+        db = FAISS.from_documents(texts, embeddings)
+        retriever = db.as_retriever()
+        qa = RetrievalQA.from_chain_type(llm=self.cheap_llm, chain_type="stuff", retriever=retriever)
+        return qa.run(f"{question} Reply in 中文")
+    def get_answer_at_once(self):
+        question = self.history[-1]["content"]
+        # llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
+        agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
+        reply = agent.run(input=f"{question} Reply in 简体中文")
+        return reply, -1
+    def get_answer_stream_iter(self):
+        question = self.history[-1]["content"]
+        it = CallbackToIterator()
+        manager = BaseCallbackManager(handlers=[ChuanhuCallbackHandler(it.callback)])
+        def thread_func():
+            tools = self.tools
+            if self.index is not None:
+                    tools.append(
+                        Tool.from_function(
+                        func=self.query_index,
+                        name="Query Knowledge Base",
+                        description=f"useful when you need to know about: {self.index_summary}",
+                        args_schema=WebBrowsingInput
+                    )
+                )
+            agent = initialize_agent(self.tools, self.llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager)
+            try:
+                reply = agent.run(input=f"{question} Reply in 简体中文")
+            except Exception as e:
+                import traceback
+                traceback.print_exc()
+                reply = str(e)
+            it.callback(reply)
+            it.finish()
+        t = Thread(target=thread_func)
+        t.start()
+        partial_text = ""
+        for value in it:
+            partial_text += value
+            yield partial_text

modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc and b/modules/models/__pycache__/ChuanhuAgent.cpython-311.pyc differ

modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc and b/modules/models/__pycache__/ChuanhuAgent.cpython-39.pyc differ

modules/models/__pycache__/base_model.cpython-311.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/base_model.cpython-311.pyc and b/modules/models/__pycache__/base_model.cpython-311.pyc differ

modules/models/__pycache__/base_model.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ

modules/models/__pycache__/minimax.cpython-39.pyc ADDED Viewed

Binary file (4.35 kB). View file

modules/models/__pycache__/models.cpython-311.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/models.cpython-311.pyc and b/modules/models/__pycache__/models.cpython-311.pyc differ

modules/models/__pycache__/models.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ

modules/models/base_model.py CHANGED Viewed

@@ -13,17 +13,110 @@ import pathlib
 from tqdm import tqdm
 import colorama
-from duckduckgo_search import ddg
 import asyncio
 import aiohttp
 from enum import Enum
 from ..presets import *
-from ..llama_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy
 class ModelType(Enum):
     Unknown = -1
@@ -34,6 +127,8 @@ class ModelType(Enum):
     StableLM = 4
     MOSS = 5
     YuanAI = 6
     @classmethod
     def get_type(cls, model_name: str):
@@ -53,6 +148,10 @@ class ModelType(Enum):
             model_type = ModelType.MOSS
         elif "yuanai" in model_name_lower:
             model_type = ModelType.YuanAI
         else:
             model_type = ModelType.Unknown
         return model_type
@@ -146,6 +245,8 @@ class BaseLLMModel:
         stream_iter = self.get_answer_stream_iter()
         for partial_text in stream_iter:
             chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
             self.all_token_counts[-1] += 1
@@ -178,67 +279,54 @@ class BaseLLMModel:
         status_text = self.token_message()
         return chatbot, status_text
-    def handle_file_upload(self, files, chatbot):
         """if the model accepts multi modal input, implement this function"""
         status = gr.Markdown.update()
         if files:
-            construct_index(self.api_key, file_src=files)
-            status = "索引构建完成"
         return gr.Files.update(), chatbot, status
     def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
         fake_inputs = None
         display_append = []
         limited_context = False
         fake_inputs = real_inputs
         if files:
-            from llama_index.indices.vector_store.base_query import GPTVectorStoreIndexQuery
-            from llama_index.indices.query.schema import QueryBundle
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-            from langchain.chat_models import ChatOpenAI
-            from llama_index import (
-                GPTSimpleVectorIndex,
-                ServiceContext,
-                LangchainEmbedding,
-                OpenAIEmbedding,
-            )
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
-            # yield chatbot + [(inputs, "")], msg
             index = construct_index(self.api_key, file_src=files)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
-            if local_embedding or self.model_type != ModelType.OpenAI:
-                embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/distiluse-base-multilingual-cased-v2"))
-            else:
-                embed_model = OpenAIEmbedding()
-            # yield chatbot + [(inputs, "")], msg
             with retrieve_proxy():
-                prompt_helper = PromptHelper(
-                    max_input_size=4096,
-                    num_output=5,
-                    max_chunk_overlap=20,
-                    chunk_size_limit=600,
-                )
-                from llama_index import ServiceContext
-                service_context = ServiceContext.from_defaults(
-                    prompt_helper=prompt_helper, embed_model=embed_model
-                )
-                query_object = GPTVectorStoreIndexQuery(
-                    index.index_struct,
-                    service_context=service_context,
-                    similarity_top_k=5,
-                    vector_store=index._vector_store,
-                    docstore=index._docstore,
-                    response_synthesizer=None
-                )
-                query_bundle = QueryBundle(real_inputs)
-                nodes = query_object.retrieve(query_bundle)
-            reference_results = [n.node.text for n in nodes]
-            reference_results = add_source_numbers(reference_results, use_source=False)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
             real_inputs = (
@@ -248,16 +336,19 @@ class BaseLLMModel:
                 .replace("{reply_language}", reply_language)
             )
         elif use_websearch:
-            limited_context = True
-            search_results = ddg(real_inputs, max_results=5)
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
-                domain_name = urllib3.util.parse_url(result["href"]).host
-                reference_results.append([result["body"], result["href"]])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
-                    f"<li><a href=\"{result['href']}\" target=\"_blank\">{domain_name}</a></li>\n"
                 )
             reference_results = add_source_numbers(reference_results)
             display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
@@ -550,7 +641,7 @@ class BaseLLMModel:
                 history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
             else:
                 history_file_path = filename
-            with open(history_file_path, "r") as f:
                 json_s = json.load(f)
             try:
                 if type(json_s["history"][0]) == str:

 from tqdm import tqdm
 import colorama
+from duckduckgo_search import DDGS
+from itertools import islice
 import asyncio
 import aiohttp
 from enum import Enum
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.callbacks.manager import BaseCallbackManager
+from typing import Any, Dict, List, Optional, Union
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.input import print_text
+from langchain.schema import AgentAction, AgentFinish, LLMResult
+from threading import Thread, Condition
+from collections import deque
 from ..presets import *
+from ..index_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy
+class CallbackToIterator:
+    def __init__(self):
+        self.queue = deque()
+        self.cond = Condition()
+        self.finished = False
+    def callback(self, result):
+        with self.cond:
+            self.queue.append(result)
+            self.cond.notify()  # Wake up the generator.
+    def __iter__(self):
+        return self
+    def __next__(self):
+        with self.cond:
+            while not self.queue and not self.finished:  # Wait for a value to be added to the queue.
+                self.cond.wait()
+            if not self.queue:
+                raise StopIteration()
+            return self.queue.popleft()
+    def finish(self):
+        with self.cond:
+            self.finished = True
+            self.cond.notify()  # Wake up the generator if it's waiting.
+def get_action_description(text):
+    match = re.search('```(.*?)```', text, re.S)
+    json_text = match.group(1)
+    # 把json转化为python字典
+    json_dict = json.loads(json_text)
+    # 提取'action'和'action_input'的值
+    action_name = json_dict['action']
+    action_input = json_dict['action_input']
+    if action_name != "Final Answer":
+        return f'<p style="font-size: smaller; color: gray;">{action_name}: {action_input}</p>'
+    else:
+        return ""
+class ChuanhuCallbackHandler(BaseCallbackHandler):
+    def __init__(self, callback) -> None:
+        """Initialize callback handler."""
+        self.callback = callback
+    def on_agent_action(
+        self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
+    ) -> Any:
+        self.callback(get_action_description(action.log))
+    def on_tool_end(
+        self,
+        output: str,
+        color: Optional[str] = None,
+        observation_prefix: Optional[str] = None,
+        llm_prefix: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """If not the final action, print out observation."""
+        # if observation_prefix is not None:
+        #     self.callback(f"\n\n{observation_prefix}")
+        # self.callback(output)
+        # if llm_prefix is not None:
+        #     self.callback(f"\n\n{llm_prefix}")
+        if observation_prefix is not None:
+            logging.info(observation_prefix)
+        self.callback(output)
+        if llm_prefix is not None:
+            logging.info(llm_prefix)
+    def on_agent_finish(
+        self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
+    ) -> None:
+        # self.callback(f"{finish.log}\n\n")
+        logging.info(finish.log)
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+        self.callback(token)
 class ModelType(Enum):
     Unknown = -1
     StableLM = 4
     MOSS = 5
     YuanAI = 6
+    Minimax = 7
+    ChuanhuAgent = 8
     @classmethod
     def get_type(cls, model_name: str):
             model_type = ModelType.MOSS
         elif "yuanai" in model_name_lower:
             model_type = ModelType.YuanAI
+        elif "minimax" in model_name_lower:
+            model_type = ModelType.Minimax
+        elif "川虎助理" in model_name_lower:
+            model_type = ModelType.ChuanhuAgent
         else:
             model_type = ModelType.Unknown
         return model_type
         stream_iter = self.get_answer_stream_iter()
+        if display_append:
+            display_append = "<hr>" +display_append
         for partial_text in stream_iter:
             chatbot[-1] = (chatbot[-1][0], partial_text + display_append)
             self.all_token_counts[-1] += 1
         status_text = self.token_message()
         return chatbot, status_text
+    def handle_file_upload(self, files, chatbot, language):
         """if the model accepts multi modal input, implement this function"""
         status = gr.Markdown.update()
         if files:
+            index = construct_index(self.api_key, file_src=files)
+            status = i18n("索引构建完成")
         return gr.Files.update(), chatbot, status
+    def summarize_index(self, files, chatbot, language):
+        status = gr.Markdown.update()
+        if files:
+            index = construct_index(self.api_key, file_src=files)
+            status = i18n("总结完成")
+            logging.info(i18n("生成内容总结中……"))
+            os.environ["OPENAI_API_KEY"] = self.api_key
+            from langchain.chains.summarize import load_summarize_chain
+            from langchain.prompts import PromptTemplate
+            from langchain.chat_models import ChatOpenAI
+            from langchain.callbacks import StdOutCallbackHandler
+            prompt_template = "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY IN " + language + ":"
+            PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
+            llm = ChatOpenAI()
+            chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=PROMPT, combine_prompt=PROMPT)
+            summary = chain({"input_documents": list(index.docstore.__dict__["_dict"].values())}, return_only_outputs=True)["output_text"]
+            print(i18n("总结") + f": {summary}")
+            chatbot.append([i18n("上传了")+str(len(files))+"个文件", summary])
+        return chatbot, status
     def prepare_inputs(self, real_inputs, use_websearch, files, reply_language, chatbot):
         fake_inputs = None
         display_append = []
         limited_context = False
         fake_inputs = real_inputs
         if files:
             from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+            from langchain.vectorstores.base import VectorStoreRetriever
             limited_context = True
             msg = "加载索引中……"
             logging.info(msg)
             index = construct_index(self.api_key, file_src=files)
             assert index is not None, "获取索引失败"
             msg = "索引获取成功，生成回答中……"
             logging.info(msg)
             with retrieve_proxy():
+                retriever = VectorStoreRetriever(vectorstore=index, search_type="similarity_score_threshold",search_kwargs={"k":6, "score_threshold": 0.5})
+                relevant_documents = retriever.get_relevant_documents(real_inputs)
+            reference_results = [[d.page_content.strip("�"), os.path.basename(d.metadata["source"])] for d in relevant_documents]
+            reference_results = add_source_numbers(reference_results)
             display_append = add_details(reference_results)
             display_append = "\n\n" + "".join(display_append)
             real_inputs = (
                 .replace("{reply_language}", reply_language)
             )
         elif use_websearch:
+            search_results = []
+            with DDGS() as ddgs:
+                ddgs_gen = ddgs.text(real_inputs, backend="lite")
+                for r in islice(ddgs_gen, 10):
+                    search_results.append(r)
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
+                domain_name = urllib3.util.parse_url(result['href']).host
+                reference_results.append([result['body'], result['href']])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
+                    f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
                 )
             reference_results = add_source_numbers(reference_results)
             display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
                 history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
             else:
                 history_file_path = filename
+            with open(history_file_path, "r", encoding="utf-8") as f:
                 json_s = json.load(f)
             try:
                 if type(json_s["history"][0]) == str:

modules/models/minimax.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import json
+import os
+import colorama
+import requests
+import logging
+from modules.models.base_model import BaseLLMModel
+from modules.presets import STANDARD_ERROR_MSG, GENERAL_ERROR_MSG, TIMEOUT_STREAMING, TIMEOUT_ALL, i18n
+group_id = os.environ.get("MINIMAX_GROUP_ID", "")
+class MiniMax_Client(BaseLLMModel):
+    """
+    MiniMax Client
+    接口文档见 https://api.minimax.chat/document/guides/chat
+    """
+    def __init__(self, model_name, api_key, user_name="", system_prompt=None):
+        super().__init__(model_name=model_name, user=user_name)
+        self.url = f'https://api.minimax.chat/v1/text/chatcompletion?GroupId={group_id}'
+        self.history = []
+        self.api_key = api_key
+        self.system_prompt = system_prompt
+        self.headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+    def get_answer_at_once(self):
+        # minimax temperature is (0,1] and base model temperature is [0,2], and yuan 0.9 == base 1 so need to convert
+        temperature = self.temperature * 0.9 if self.temperature <= 1 else 0.9 + (self.temperature - 1) / 10
+        request_body = {
+            "model": self.model_name.replace('minimax-', ''),
+            "temperature": temperature,
+            "skip_info_mask": True,
+            'messages': [{"sender_type": "USER", "text": self.history[-1]['content']}]
+        }
+        if self.n_choices:
+            request_body['beam_width'] = self.n_choices
+        if self.system_prompt:
+            request_body['prompt'] = self.system_prompt
+        if self.max_generation_token:
+            request_body['tokens_to_generate'] = self.max_generation_token
+        if self.top_p:
+            request_body['top_p'] = self.top_p
+        response = requests.post(self.url, headers=self.headers, json=request_body)
+        res = response.json()
+        answer = res['reply']
+        total_token_count = res["usage"]["total_tokens"]
+        return answer, total_token_count
+    def get_answer_stream_iter(self):
+        response = self._get_response(stream=True)
+        if response is not None:
+            iter = self._decode_chat_response(response)
+            partial_text = ""
+            for i in iter:
+                partial_text += i
+                yield partial_text
+        else:
+            yield STANDARD_ERROR_MSG + GENERAL_ERROR_MSG
+    def _get_response(self, stream=False):
+        minimax_api_key = self.api_key
+        history = self.history
+        logging.debug(colorama.Fore.YELLOW +
+                      f"{history}" + colorama.Fore.RESET)
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {minimax_api_key}",
+        }
+        temperature = self.temperature * 0.9 if self.temperature <= 1 else 0.9 + (self.temperature - 1) / 10
+        messages = []
+        for msg in self.history:
+            if msg['role'] == 'user':
+                messages.append({"sender_type": "USER", "text": msg['content']})
+            else:
+                messages.append({"sender_type": "BOT", "text": msg['content']})
+        request_body = {
+            "model": self.model_name.replace('minimax-', ''),
+            "temperature": temperature,
+            "skip_info_mask": True,
+            'messages': messages
+        }
+        if self.n_choices:
+            request_body['beam_width'] = self.n_choices
+        if self.system_prompt:
+            lines = self.system_prompt.splitlines()
+            if lines[0].find(":") != -1 and len(lines[0]) < 20:
+                request_body["role_meta"] = {
+                    "user_name": lines[0].split(":")[0],
+                    "bot_name": lines[0].split(":")[1]
+                }
+                lines.pop()
+            request_body["prompt"] = "\n".join(lines)
+        if self.max_generation_token:
+            request_body['tokens_to_generate'] = self.max_generation_token
+        else:
+            request_body['tokens_to_generate'] = 512
+        if self.top_p:
+            request_body['top_p'] = self.top_p
+        if stream:
+            timeout = TIMEOUT_STREAMING
+            request_body['stream'] = True
+            request_body['use_standard_sse'] = True
+        else:
+            timeout = TIMEOUT_ALL
+        try:
+            response = requests.post(
+                self.url,
+                headers=headers,
+                json=request_body,
+                stream=stream,
+                timeout=timeout,
+            )
+        except:
+            return None
+        return response
+    def _decode_chat_response(self, response):
+        error_msg = ""
+        for chunk in response.iter_lines():
+            if chunk:
+                chunk = chunk.decode()
+                chunk_length = len(chunk)
+                print(chunk)
+                try:
+                    chunk = json.loads(chunk[6:])
+                except json.JSONDecodeError:
+                    print(i18n("JSON解析错误,��到的内容: ") + f"{chunk}")
+                    error_msg += chunk
+                    continue
+                if chunk_length > 6 and "delta" in chunk["choices"][0]:
+                    if "finish_reason" in chunk["choices"][0] and chunk["choices"][0]["finish_reason"] == "stop":
+                        self.all_token_counts.append(chunk["usage"]["total_tokens"] - sum(self.all_token_counts))
+                        break
+                    try:
+                        yield chunk["choices"][0]["delta"]
+                    except Exception as e:
+                        logging.error(f"Error: {e}")
+                        continue
+        if error_msg:
+            try:
+                error_msg = json.loads(error_msg)
+                if 'base_resp' in error_msg:
+                    status_code = error_msg['base_resp']['status_code']
+                    status_msg = error_msg['base_resp']['status_msg']
+                    raise Exception(f"{status_code} - {status_msg}")
+            except json.JSONDecodeError:
+                pass
+            raise Exception(error_msg)

modules/models/models.py CHANGED Viewed

@@ -15,14 +15,13 @@ from PIL import Image
 from tqdm import tqdm
 import colorama
-from duckduckgo_search import ddg
 import asyncio
 import aiohttp
 from enum import Enum
 import uuid
 from ..presets import *
-from ..llama_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy, usage_limit
@@ -339,7 +338,7 @@ class LLaMA_Client(BaseLLMModel):
             pipeline_args = InferencerArguments(
                 local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
-            with open(pipeline_args.deepspeed, "r") as f:
                 ds_config = json.load(f)
             LLAMA_MODEL = AutoModel.get_model(
                 model_args,
@@ -494,7 +493,7 @@ class XMChat(BaseLLMModel):
         limited_context = False
         return limited_context, fake_inputs, display_append, real_inputs, chatbot
-    def handle_file_upload(self, files, chatbot):
         """if the model accepts multi modal input, implement this function"""
         if files:
             for file in files:
@@ -557,6 +556,7 @@ def get_model(
         config.local_embedding = True
     # del current_model.model
     model = None
     try:
         if model_type == ModelType.OpenAI:
             logging.info(f"正在加载OpenAI模型: {model_name}")
@@ -602,10 +602,17 @@ def get_model(
         elif model_type == ModelType.YuanAI:
             from .inspurai import Yuan_Client
             model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
         elif model_type == ModelType.Unknown:
             raise ValueError(f"未知模型: {model_name}")
         logging.info(msg)
-        chatbot = gr.Chatbot.update(label=model_name)
     except Exception as e:
         logging.error(e)
         msg = f"{STANDARD_ERROR_MSG}: {e}"
@@ -616,7 +623,7 @@ def get_model(
 if __name__ == "__main__":
-    with open("config.json", "r") as f:
         openai_api_key = cjson.load(f)["openai_api_key"]
     # set logging level to debug
     logging.basicConfig(level=logging.DEBUG)

 from tqdm import tqdm
 import colorama
 import asyncio
 import aiohttp
 from enum import Enum
 import uuid
 from ..presets import *
+from ..index_func import *
 from ..utils import *
 from .. import shared
 from ..config import retrieve_proxy, usage_limit
             pipeline_args = InferencerArguments(
                 local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
+            with open(pipeline_args.deepspeed, "r", encoding="utf-8") as f:
                 ds_config = json.load(f)
             LLAMA_MODEL = AutoModel.get_model(
                 model_args,
         limited_context = False
         return limited_context, fake_inputs, display_append, real_inputs, chatbot
+    def handle_file_upload(self, files, chatbot, language):
         """if the model accepts multi modal input, implement this function"""
         if files:
             for file in files:
         config.local_embedding = True
     # del current_model.model
     model = None
+    chatbot = gr.Chatbot.update(label=model_name)
     try:
         if model_type == ModelType.OpenAI:
             logging.info(f"正在加载OpenAI模型: {model_name}")
         elif model_type == ModelType.YuanAI:
             from .inspurai import Yuan_Client
             model = Yuan_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
+        elif model_type == ModelType.Minimax:
+            from .minimax import MiniMax_Client
+            if os.environ.get("MINIMAX_API_KEY") != "":
+                access_key = os.environ.get("MINIMAX_API_KEY")
+            model = MiniMax_Client(model_name, api_key=access_key, user_name=user_name, system_prompt=system_prompt)
+        elif model_type == ModelType.ChuanhuAgent:
+            from .ChuanhuAgent import ChuanhuAgent_Client
+            model = ChuanhuAgent_Client(model_name, access_key, user_name=user_name)
         elif model_type == ModelType.Unknown:
             raise ValueError(f"未知模型: {model_name}")
         logging.info(msg)
     except Exception as e:
         logging.error(e)
         msg = f"{STANDARD_ERROR_MSG}: {e}"
 if __name__ == "__main__":
+    with open("config.json", "r", encoding="utf-8") as f:
         openai_api_key = cjson.load(f)["openai_api_key"]
     # set logging level to debug
     logging.basicConfig(level=logging.DEBUG)

modules/overwrites.py CHANGED Viewed

@@ -1,23 +1,13 @@
 from __future__ import annotations
 import logging
-from llama_index import Prompt
 from typing import List, Tuple
-import mdtex2html
 from gradio_client import utils as client_utils
 from modules.presets import *
-from modules.llama_func import *
-from modules.config import render_latex
-def compact_text_chunks(self, prompt: Prompt, text_chunks: List[str]) -> List[str]:
-    logging.debug("Compacting text chunks...🚀🚀🚀")
-    combined_str = [c.strip() for c in text_chunks if c.strip()]
-    combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
-    combined_str = "\n\n".join(combined_str)
-    # resplit based on self.max_chunk_overlap
-    text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
-    return text_splitter.split_text(combined_str)
 def postprocess(
@@ -50,14 +40,18 @@ def postprocess(
         return processed_messages
 def postprocess_chat_messages(
-        self, chat_message: str | Tuple | List | None, message_type: str
-    ) -> str | Dict | None:
         if chat_message is None:
             return None
         elif isinstance(chat_message, (tuple, list)):
-            filepath = chat_message[0]
             mime_type = client_utils.get_mimetype(filepath)
-            filepath = self.make_temp_copy_if_needed(filepath)
             return {
                 "name": filepath,
                 "mime_type": mime_type,
@@ -66,12 +60,13 @@ def postprocess_chat_messages(
                 "is_file": True,
             }
         elif isinstance(chat_message, str):
-            if message_type == "bot":
-                if not detect_converted_mark(chat_message):
-                    chat_message = convert_mdtext(chat_message)
-            elif message_type == "user":
-                if not detect_converted_mark(chat_message):
-                    chat_message = convert_asis(chat_message)
             return chat_message
         else:
             raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
@@ -85,11 +80,8 @@ with open("./assets/custom.js", "r", encoding="utf-8") as f, \
 def reload_javascript():
     print("Reloading javascript...")
     js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
-    if render_latex:
-        js += """\
-            <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-MML-AM_CHTML"></script>
-            <script type="text/x-mathjax-config">MathJax.Hub.Config({skipStartupTypeset: false, tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']],displayMath: [['$$','$$'], ['\\[','\\]']]}});</script>
-        """
     def template_response(*args, **kwargs):
         res = GradioTemplateResponseOriginal(*args, **kwargs)
         res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))

 from __future__ import annotations
 import logging
 from typing import List, Tuple
 from gradio_client import utils as client_utils
+from gradio import utils
+import inspect
 from modules.presets import *
+from modules.index_func import *
 def postprocess(
         return processed_messages
 def postprocess_chat_messages(
+        self, chat_message: str | tuple | list | None, role: str
+    ) -> str | dict | None:
         if chat_message is None:
             return None
         elif isinstance(chat_message, (tuple, list)):
+            file_uri = chat_message[0]
+            if utils.validate_url(file_uri):
+                filepath = file_uri
+            else:
+                filepath = self.make_temp_copy_if_needed(file_uri)
             mime_type = client_utils.get_mimetype(filepath)
             return {
                 "name": filepath,
                 "mime_type": mime_type,
                 "is_file": True,
             }
         elif isinstance(chat_message, str):
+            # chat_message = inspect.cleandoc(chat_message)
+            # escape html spaces
+            # chat_message = chat_message.replace(" ", "&nbsp;")
+            if role == "bot":
+                chat_message = convert_bot_before_marked(chat_message)
+            elif role == "user":
+                chat_message = convert_user_before_marked(chat_message)
             return chat_message
         else:
             raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
 def reload_javascript():
     print("Reloading javascript...")
     js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
+    # if render_latex:
+    #     js += """\"""
     def template_response(*args, **kwargs):
         res = GradioTemplateResponseOriginal(*args, **kwargs)
         res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))

modules/pdf_func.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from types import SimpleNamespace
 import pdfplumber
 import logging
-from llama_index import Document
 def prepare_table_config(crop_page):
     """Prepare table查找边界, 要求page为原始page
     From https://github.com/jsvine/pdfplumber/issues/242
     """
     page = crop_page.root_page # root/parent
@@ -60,7 +60,7 @@ def get_title_with_cropped_page(first_page):
             title_bottom = word.bottom
         elif word.text == "Abstract": # 获取页面abstract
             top = word.top
     user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
     # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
     return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
@@ -75,7 +75,7 @@ def get_column_cropped_pages(pages, two_column=True):
             new_pages.append(right)
         else:
             new_pages.append(page)
     return new_pages
 def parse_pdf(filename, two_column = True):
@@ -94,7 +94,7 @@ def parse_pdf(filename, two_column = True):
             name_top=name_top,
             name_bottom=name_bottom,
             record_chapter_name = True,
             page_start=page_start,
             page_stop=None,
@@ -114,7 +114,7 @@ def parse_pdf(filename, two_column = True):
                 if word.size >= 11: # 出现chapter name
                     if cur_chapter is None:
                         cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
-                    elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
                         # 不再继续写chapter name
                         cur_chapter.page_stop = page.page_number # stop id
                         chapters.append(cur_chapter)
@@ -143,7 +143,7 @@ def parse_pdf(filename, two_column = True):
         text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
     logging.getLogger().setLevel(level)
-    return Document(text=text, extra_info={"title": title})
 BASE_POINTS = """
 1. Who are the authors?

 from types import SimpleNamespace
 import pdfplumber
 import logging
+from langchain.docstore.document import Document
 def prepare_table_config(crop_page):
     """Prepare table查找边界, 要求page为原始page
     From https://github.com/jsvine/pdfplumber/issues/242
     """
     page = crop_page.root_page # root/parent
             title_bottom = word.bottom
         elif word.text == "Abstract": # 获取页面abstract
             top = word.top
     user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
     # 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
     return title, user_info, first_page.within_bbox((x0,top,x1,bottom))
             new_pages.append(right)
         else:
             new_pages.append(page)
     return new_pages
 def parse_pdf(filename, two_column = True):
             name_top=name_top,
             name_bottom=name_bottom,
             record_chapter_name = True,
             page_start=page_start,
             page_stop=None,
                 if word.size >= 11: # 出现chapter name
                     if cur_chapter is None:
                         cur_chapter = create_chapter(page.page_number, word.top, word.bottom)
+                    elif not cur_chapter.record_chapter_name or (cur_chapter.name_bottom != cur_chapter.name_bottom and cur_chapter.name_top != cur_chapter.name_top):
                         # 不再继续写chapter name
                         cur_chapter.page_stop = page.page_number # stop id
                         chapters.append(cur_chapter)
         text += f"The {idx}th Chapter {chapter.name}: " + " ".join(chapter.text) + "\n"
     logging.getLogger().setLevel(level)
+    return Document(page_content=text, metadata={"title": title})
 BASE_POINTS = """
 1. Who are the authors?

modules/presets.py CHANGED Viewed

@@ -46,32 +46,27 @@ CHUANHU_TITLE = i18n("川虎Chat 🚀")
 CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
-FOOTER = """<div class="versions">{versions}</div>"""
-APPEARANCE_SWITCHER = """
-<div style="display: flex; justify-content: space-between;">
-<span style="margin-top: 4px !important;">"""+ i18n("切换亮暗色主题")  + """</span>
-<span><label class="apSwitch" for="checkbox">
-    <input type="checkbox" id="checkbox">
-    <div class="apSlider"></div>
-</label></span>
-</div>
-"""
-SUMMARIZE_PROMPT = "你是谁？我们刚才聊了什么？"  # 总结对话时的 prompt
 ONLINE_MODELS = [
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-0301",
     "gpt-4",
     "gpt-4-0314",
     "gpt-4-32k",
     "gpt-4-32k-0314",
     "xmchat",
     "yuanai-1.0-base_10B",
     "yuanai-1.0-translate",
     "yuanai-1.0-dialog",
     "yuanai-1.0-rhythm_poems",
 ]
 LOCAL_MODELS = [
@@ -103,11 +98,15 @@ for dir_name in os.listdir("models"):
 MODEL_TOKEN_LIMIT = {
     "gpt-3.5-turbo": 4096,
     "gpt-3.5-turbo-0301": 4096,
     "gpt-4": 8192,
     "gpt-4-0314": 8192,
     "gpt-4-32k": 32768,
-    "gpt-4-32k-0314": 32768
 }
 TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
@@ -164,6 +163,12 @@ Reply in {reply_language}
 If the context isn't useful, return the original answer.
 """
 ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
 small_and_beautiful_theme = gr.themes.Soft(
@@ -230,4 +235,6 @@ small_and_beautiful_theme = gr.themes.Soft(
         block_title_background_fill_dark="*primary_900",
         block_label_background_fill_dark="*primary_900",
         input_background_fill="#F6F6F6",
     )

 CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
 ONLINE_MODELS = [
     "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
     "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
     "gpt-4",
     "gpt-4-0314",
+    "gpt-4-0613",
     "gpt-4-32k",
     "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "川虎助理",
+    "川虎助理 Pro",
     "xmchat",
     "yuanai-1.0-base_10B",
     "yuanai-1.0-translate",
     "yuanai-1.0-dialog",
     "yuanai-1.0-rhythm_poems",
+    "minimax-abab4-chat",
+    "minimax-abab5-chat",
 ]
 LOCAL_MODELS = [
 MODEL_TOKEN_LIMIT = {
     "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-16k": 16384,
     "gpt-3.5-turbo-0301": 4096,
+    "gpt-3.5-turbo-0613": 4096,
     "gpt-4": 8192,
     "gpt-4-0314": 8192,
+    "gpt-4-0613": 8192,
     "gpt-4-32k": 32768,
+    "gpt-4-32k-0314": 32768,
+    "gpt-4-32k-0613": 32768
 }
 TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
 If the context isn't useful, return the original answer.
 """
+SUMMARIZE_PROMPT = """Write a concise summary of the following:
+{text}
+CONCISE SUMMARY IN 中文:"""
 ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
 small_and_beautiful_theme = gr.themes.Soft(
         block_title_background_fill_dark="*primary_900",
         block_label_background_fill_dark="*primary_900",
         input_background_fill="#F6F6F6",
+        chatbot_code_background_color="*neutral_950",
+        chatbot_code_background_color_dark="*neutral_950",
     )

modules/shared.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from modules.presets import COMPLETION_URL, BALANCE_API_URL, USAGE_API_URL, API_HOST
 import os
 import queue
 class State:
     interrupted = False
@@ -15,23 +16,28 @@ class State:
     def recover(self):
         self.interrupted = False
-    def set_api_host(self, api_host):
-        self.completion_url = f"https://{api_host}/v1/chat/completions"
-        self.balance_api_url = f"https://{api_host}/dashboard/billing/credit_grants"
-        self.usage_api_url = f"https://{api_host}/dashboard/billing/usage"
-        os.environ["OPENAI_API_BASE"] = f"https://{api_host}/v1"
     def reset_api_host(self):
         self.completion_url = COMPLETION_URL
         self.balance_api_url = BALANCE_API_URL
         self.usage_api_url = USAGE_API_URL
-        os.environ["OPENAI_API_BASE"] = f"https://{API_HOST}/v1"
         return API_HOST
     def reset_all(self):
         self.interrupted = False
         self.completion_url = COMPLETION_URL
     def set_api_key_queue(self, api_key_list):
         self.multi_api_key = True
         self.api_key_queue = queue.Queue()
@@ -50,6 +56,9 @@ class State:
             return ret
         return wrapped
 state = State()

 from modules.presets import COMPLETION_URL, BALANCE_API_URL, USAGE_API_URL, API_HOST
 import os
 import queue
+import openai
 class State:
     interrupted = False
     def recover(self):
         self.interrupted = False
+    def set_api_host(self, api_host: str):
+        api_host = api_host.rstrip("/")
+        if not api_host.startswith("http"):
+            api_host = f"https://{api_host}"
+        if api_host.endswith("/v1"):
+            api_host = api_host[:-3]
+        self.completion_url = f"{api_host}/v1/chat/completions"
+        self.balance_api_url = f"{api_host}/dashboard/billing/credit_grants"
+        self.usage_api_url = f"{api_host}/dashboard/billing/usage"
+        os.environ["OPENAI_API_BASE"] = api_host
     def reset_api_host(self):
         self.completion_url = COMPLETION_URL
         self.balance_api_url = BALANCE_API_URL
         self.usage_api_url = USAGE_API_URL
+        os.environ["OPENAI_API_BASE"] = f"https://{API_HOST}"
         return API_HOST
     def reset_all(self):
         self.interrupted = False
         self.completion_url = COMPLETION_URL
     def set_api_key_queue(self, api_key_list):
         self.multi_api_key = True
         self.api_key_queue = queue.Queue()
             return ret
         return wrapped
 state = State()
+modules_path = os.path.dirname(os.path.realpath(__file__))
+chuanhu_path = os.path.dirname(modules_path)

modules/utils.py CHANGED Viewed

@@ -16,7 +16,6 @@ import subprocess
 import gradio as gr
 from pypinyin import lazy_pinyin
 import tiktoken
-import mdtex2html
 from markdown import markdown
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
@@ -116,6 +115,9 @@ def set_single_turn(current_model, *args):
 def handle_file_upload(current_model, *args):
     return current_model.handle_file_upload(*args)
 def like(current_model, *args):
     return current_model.like(*args)
@@ -130,7 +132,7 @@ def count_token(message):
     return length
-def markdown_to_html_with_syntax_highlight(md_str):
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
@@ -152,7 +154,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
     return html_str
-def normalize_markdown(md_text: str) -> str:
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
@@ -176,7 +178,7 @@ def normalize_markdown(md_text: str) -> str:
     return "\n".join(normalized_lines)
-def convert_mdtext(md_text):
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
@@ -200,15 +202,70 @@ def convert_mdtext(md_text):
     output += ALREADY_CONVERTED_MARK
     return output
-def convert_asis(userinput):
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
-def detect_converted_mark(userinput):
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
@@ -218,7 +275,7 @@ def detect_converted_mark(userinput):
         return True
-def detect_language(code):
     if code.startswith("\n"):
         first_line = ""
     else:
@@ -253,8 +310,8 @@ def save_file(filename, system, history, chatbot, user_name):
             history_file_path = filename
         else:
             history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
-        with open(history_file_path, "w") as f:
-            json.dump(json_s, f)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
@@ -494,6 +551,13 @@ def versions_html():
         <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
         """
 def add_source_numbers(lst, source_name = "Source", use_source = True):
     if use_source:
         return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
@@ -560,7 +624,7 @@ def toggle_like_btn_visibility(selected_model_name):
 def new_auto_history_filename(dirname):
     latest_file = get_latest_filepath(dirname)
     if latest_file:
-        with open(os.path.join(dirname, latest_file), 'r') as f:
             if len(f.read()) == 0:
                 return latest_file
     now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

 import gradio as gr
 from pypinyin import lazy_pinyin
 import tiktoken
 from markdown import markdown
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
 def handle_file_upload(current_model, *args):
     return current_model.handle_file_upload(*args)
+def handle_summarize_index(current_model, *args):
+    return current_model.summarize_index(*args)
 def like(current_model, *args):
     return current_model.like(*args)
     return length
+def markdown_to_html_with_syntax_highlight(md_str): # deprecated
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
     return html_str
+def normalize_markdown(md_text: str) -> str: # deprecated
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
     return "\n".join(normalized_lines)
+def convert_mdtext(md_text): # deprecated
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
     output += ALREADY_CONVERTED_MARK
     return output
+def convert_bot_before_marked(chat_message):
+    """
+    注意不能给输出加缩进, 否则会被marked解析成代码块
+    """
+    if '<div class="md-message">' in chat_message:
+        return chat_message
+    else:
+        code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
+        code_blocks = code_block_pattern.findall(chat_message)
+        non_code_parts = code_block_pattern.split(chat_message)[::2]
+        result = []
+        raw = f'<div class="raw-message hideM">{escape_markdown(chat_message)}</div>'
+        for non_code, code in zip(non_code_parts, code_blocks + [""]):
+            if non_code.strip():
+                result.append(non_code)
+            if code.strip():
+                code = f"\n```{code}\n```"
+                result.append(code)
+        result = "".join(result)
+        md = f'<div class="md-message">{result}\n</div>'
+        return raw + md
+def convert_user_before_marked(chat_message):
+    if '<div class="user-message">' in chat_message:
+        return chat_message
+    else:
+        return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
+def escape_markdown(text):
+    """
+    Escape Markdown special characters to HTML-safe equivalents.
+    """
+    escape_chars = {
+        ' ': '&nbsp;',
+        '_': '&#95;',
+        '*': '&#42;',
+        '[': '&#91;',
+        ']': '&#93;',
+        '(': '&#40;',
+        ')': '&#41;',
+        '{': '&#123;',
+        '}': '&#125;',
+        '#': '&#35;',
+        '+': '&#43;',
+        '-': '&#45;',
+        '.': '&#46;',
+        '!': '&#33;',
+        '`': '&#96;',
+        '>': '&#62;',
+        '<': '&#60;',
+        '|': '&#124;'
+    }
+    return ''.join(escape_chars.get(c, c) for c in text)
+def convert_asis(userinput): # deprecated
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
+def detect_converted_mark(userinput): # deprecated
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
         return True
+def detect_language(code): # deprecated
     if code.startswith("\n"):
         first_line = ""
     else:
             history_file_path = filename
         else:
             history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
+        with open(history_file_path, "w", encoding='utf-8') as f:
+            json.dump(json_s, f, ensure_ascii=False)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
         <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
         """
+def get_html(filename):
+    path = os.path.join(shared.chuanhu_path, "assets", "html", filename)
+    if os.path.exists(path):
+        with open(path, encoding="utf8") as file:
+            return file.read()
+    return ""
 def add_source_numbers(lst, source_name = "Source", use_source = True):
     if use_source:
         return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
 def new_auto_history_filename(dirname):
     latest_file = get_latest_filepath(dirname)
     if latest_file:
+        with open(os.path.join(dirname, latest_file), 'r', encoding="utf-8") as f:
             if len(f.read()) == 0:
                 return latest_file
     now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

requirements.txt CHANGED Viewed

@@ -1,18 +1,25 @@
-gradio==3.28.0
-gradio_client==0.1.4
-mdtex2html
 pypinyin
 tiktoken
 socksio
 tqdm
 colorama
-duckduckgo_search==2.9.5
 Pygments
-llama_index==0.5.25
-langchain<0.0.150
 markdown
 PyPDF2
 pdfplumber
 pandas
 commentjson
 openpyxl

+gradio==3.33.1
+gradio_client==0.2.5
 pypinyin
 tiktoken
 socksio
 tqdm
 colorama
+googlesearch-python
 Pygments
+langchain==0.0.173
 markdown
 PyPDF2
 pdfplumber
 pandas
 commentjson
 openpyxl
+pandoc
+wolframalpha
+faiss-cpu
+duckduckgo-search
+arxiv
+wikipedia
+google.generativeai
+openai
+unstructured