Spaces:

qgyd2021
/

chat_with_llm

Sleeping

App Files Files Community

qgyd2021 commited on Oct 3, 2023

Commit

c64dba3

•

1 Parent(s): 3c85855

[update]add main

Browse files

Files changed (1) hide show

main.py +18 -6

main.py CHANGED Viewed

@@ -73,6 +73,7 @@ def chat_with_llm_non_stream(question: str,
                              history: List[Tuple[str, str]],
                              pretrained_model_name_or_path: str,
                              max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
                              ):
     device: str = "cuda" if torch.cuda.is_available() else "cpu"
@@ -90,7 +91,8 @@ def chat_with_llm_non_stream(question: str,
     for input_ids_ in batch_input_ids:
         input_ids.extend(input_ids_)
         input_ids.append(tokenizer.eos_token_id)
-    input_ids = torch.tensor([input_ids], dtype=torch.long).to(device)
     with torch.no_grad():
         outputs = model.generate(
@@ -114,6 +116,7 @@ def chat_with_llm_streaming(question: str,
                             history: List[Tuple[str, str]],
                             pretrained_model_name_or_path: str,
                             max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
                             ):
     device: str = "cuda" if torch.cuda.is_available() else "cpu"
@@ -131,7 +134,8 @@ def chat_with_llm_streaming(question: str,
     for input_ids_ in batch_input_ids:
         input_ids.extend(input_ids_)
         input_ids.append(tokenizer.eos_token_id)
-    input_ids = torch.tensor([input_ids], dtype=torch.long).to(device)
     streamer = TextIteratorStreamer(tokenizer=tokenizer)
@@ -190,17 +194,25 @@ def main():
                 temperature = gr.Slider(minimum=0, maximum=1, value=0.35, step=0.01, label="temperature")
             with gr.Column(scale=1):
                 repetition_penalty = gr.Slider(minimum=0, maximum=2, value=1.2, step=0.01, label="repetition_penalty")
         with gr.Row():
-            model_name = gr.Dropdown(choices=["Qwen/Qwen-7B-Chat"],
-                                     value="Qwen/Qwen-7B-Chat",
-                                     label="model_name",
-                                     )
         gr.Examples(examples=["你好"], inputs=text_box)
         inputs = [
             text_box, chatbot, model_name,
             max_new_tokens, top_p, temperature, repetition_penalty,
         ]
         outputs = [
             chatbot

                              history: List[Tuple[str, str]],
                              pretrained_model_name_or_path: str,
                              max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
+                             history_max_len: int,
                              ):
     device: str = "cuda" if torch.cuda.is_available() else "cpu"
     for input_ids_ in batch_input_ids:
         input_ids.extend(input_ids_)
         input_ids.append(tokenizer.eos_token_id)
+    input_ids = torch.tensor([input_ids], dtype=torch.long)
+    input_ids = input_ids[:, -history_max_len:].to(device)
     with torch.no_grad():
         outputs = model.generate(
                             history: List[Tuple[str, str]],
                             pretrained_model_name_or_path: str,
                             max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
+                            history_max_len: int,
                             ):
     device: str = "cuda" if torch.cuda.is_available() else "cpu"
     for input_ids_ in batch_input_ids:
         input_ids.extend(input_ids_)
         input_ids.append(tokenizer.eos_token_id)
+    input_ids = torch.tensor([input_ids], dtype=torch.long)
+    input_ids = input_ids[:, -history_max_len:].to(device)
     streamer = TextIteratorStreamer(tokenizer=tokenizer)
                 temperature = gr.Slider(minimum=0, maximum=1, value=0.35, step=0.01, label="temperature")
             with gr.Column(scale=1):
                 repetition_penalty = gr.Slider(minimum=0, maximum=2, value=1.2, step=0.01, label="repetition_penalty")
+            with gr.Column(scale=1):
+                history_max_len = gr.Slider(minimum=0, maximum=4096, value=1024, step=1, label="history_max_len")
         with gr.Row():
+            model_name = gr.Dropdown(
+                choices=[
+                    "Qwen/Qwen-7B-Chat",
+                    "THUDM/chatglm2-6b",
+                    "baichuan-inc/Baichuan2-7B-Chat",
+                ],
+                value="Qwen/Qwen-7B-Chat",
+                label="model_name",
+            )
         gr.Examples(examples=["你好"], inputs=text_box)
         inputs = [
             text_box, chatbot, model_name,
             max_new_tokens, top_p, temperature, repetition_penalty,
+            history_max_len
         ]
         outputs = [
             chatbot