Spaces:

qgyd2021
/

chat_with_llm

Sleeping

App Files Files Community

qgyd2021 commited on Oct 3, 2023

Commit

f97e6ec

•

1 Parent(s): 0cfc467

[update]add main

Browse files

Files changed (2) hide show

main.py +19 -14
requirements.txt +1 -0

main.py CHANGED Viewed

@@ -19,7 +19,9 @@ def greet(question: str, history: List[Tuple[str, str]]):
 model_map: dict = dict()
-def init_model(pretrained_model_name_or_path: str, device: str):
     global model_map
     if pretrained_model_name_or_path not in model_map.keys():
         # clear
@@ -70,18 +72,24 @@ def chat_with_llm_non_stream(question: str,
                              history: List[Tuple[str, str]],
                              pretrained_model_name_or_path: str,
                              max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
-                             device: str
                              ):
-    model, tokenizer = init_model(pretrained_model_name_or_path, device)
-    input_ids = tokenizer(
-        question,
-        return_tensors="pt",
-        add_special_tokens=False,
-    ).input_ids.to(device)
-    bos_token_id = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long).to(device)
-    eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long).to(device)
-    input_ids = torch.concat([bos_token_id, input_ids, eos_token_id], dim=1)
     with torch.no_grad():
         outputs = model.generate(
@@ -106,8 +114,6 @@ def main():
     chat llm
     """
-    device: str = "cuda" if torch.cuda.is_available() else "cpu"
     with gr.Blocks() as blocks:
         gr.Markdown(value=description)
@@ -143,7 +149,6 @@ def main():
         inputs = [
             text_box, chatbot, model_name,
             max_new_tokens, top_p, temperature, repetition_penalty,
-            device
         ]
         outputs = [
             chatbot

 model_map: dict = dict()
+def init_model(pretrained_model_name_or_path: str):
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
     global model_map
     if pretrained_model_name_or_path not in model_map.keys():
         # clear
                              history: List[Tuple[str, str]],
                              pretrained_model_name_or_path: str,
                              max_new_tokens: int, top_p: float, temperature: float, repetition_penalty: float,
                              ):
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    model, tokenizer = init_model(pretrained_model_name_or_path)
+    text_list = list()
+    for pair in history:
+        text_list.extend(pair)
+    text_list.append(question)
+    text_encoded = tokenizer.__call__(text_list, add_special_tokens=False)
+    batch_input_ids = text_encoded["input_ids"]
+    input_ids = [tokenizer.bos_token_id]
+    for input_ids_ in batch_input_ids:
+        input_ids.extend(input_ids_)
+        input_ids.append(tokenizer.eos_token_id)
+    input_ids = torch.tensor([input_ids], dtype=torch.long).to(device)
     with torch.no_grad():
         outputs = model.generate(
     chat llm
     """
     with gr.Blocks() as blocks:
         gr.Markdown(value=description)
         inputs = [
             text_box, chatbot, model_name,
             max_new_tokens, top_p, temperature, repetition_penalty,
         ]
         outputs = [
             chatbot

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==3.38.0
 transformers==4.30.2
 torch==1.13.0

 gradio==3.38.0
 transformers==4.30.2
 torch==1.13.0
+tiktoken==0.5.1