baichuan-inc
/

Baichuan-13B-Chat

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

WinterGYC commited on Aug 23, 2023

Commit

a1c6298

•

1 Parent(s): 25f60a6

Update handler.py

Files changed (1) hide show

handler.py +8 -8

handler.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # get dtype
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
@@ -23,15 +23,15 @@ class EndpointHandler:
             prediction = self.pipeline(inputs, **parameters)
         else:
             prediction = self.pipeline(inputs)
-        print("---start---")
-        print(prediction)
-        print("---end---")
         # ignoring parameters! Default to configs in generation_config.json.
         messages = [{"role": "user", "content": data}]
         response = self.model.chat(self.tokenizer, messages)
-        print("---start chat response---")
-        print(response)
-        print("---end chat response---")
-        return [{'generated_text': response}]

 import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import logging
 # get dtype
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
             prediction = self.pipeline(inputs, **parameters)
         else:
             prediction = self.pipeline(inputs)
+        logging.warn("---start---")
+        logging.warn(prediction)
+        logging.warn("---end---")
         # ignoring parameters! Default to configs in generation_config.json.
         messages = [{"role": "user", "content": data}]
         response = self.model.chat(self.tokenizer, messages)
+        logging.warn("---start chat response---")
+        logging.warn(response)
+        logging.warn("---end chat response---")
+        return [[{response: 1.0}]]