zzz99 commited on
Commit
87658d7
1 Parent(s): 65520f9

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +9 -9
handler.py CHANGED
@@ -1,9 +1,9 @@
1
  from typing import Any, Dict
2
 
3
  import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
 
6
- # from peft import PeftConfig, PeftModel
7
 
8
 
9
  class EndpointHandler:
@@ -11,17 +11,17 @@ class EndpointHandler:
11
  # load model and processor from path
12
  self.tokenizer = AutoTokenizer.from_pretrained(path)
13
  # try:
14
- # config = AutoConfig.from_pretrained(path)
15
  model = AutoModelForCausalLM.from_pretrained(
16
- path,
17
  # return_dict=True,
18
  # load_in_8bit=True,
19
  device_map="auto",
20
  torch_dtype=torch.float16,
21
- # trust_remote_code=True,
22
  )
23
  # model.resize_token_embeddings(len(self.tokenizer))
24
- # model = PeftModel.from_pretrained(model, path)
25
  # except Exception:
26
  # model = AutoModelForCausalLM.from_pretrained(
27
  # path, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True
@@ -35,13 +35,13 @@ class EndpointHandler:
35
  parameters = data.pop("parameters", None)
36
 
37
  # preprocess
38
- inputs = self.tokenizer(f"User: {inputs}\n\n", return_tensors="pt")
39
 
40
  # pass inputs with all kwargs in data
41
  if parameters is not None:
42
- outputs = self.model.generate(**inputs.to(self.device), max_new_tokens=880, **parameters)
43
  else:
44
- outputs = self.model.generate(**inputs.to(self.device), max_new_tokens=880)
45
 
46
  # postprocess the prediction
47
  prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
1
  from typing import Any, Dict
2
 
3
  import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
+ from peft import PeftConfig, PeftModel
7
 
8
 
9
  class EndpointHandler:
 
11
  # load model and processor from path
12
  self.tokenizer = AutoTokenizer.from_pretrained(path)
13
  # try:
14
+ config = PeftConfig.from_pretrained(path)
15
  model = AutoModelForCausalLM.from_pretrained(
16
+ config.base_model_name_or_path,
17
  # return_dict=True,
18
  # load_in_8bit=True,
19
  device_map="auto",
20
  torch_dtype=torch.float16,
21
+ trust_remote_code=True,
22
  )
23
  # model.resize_token_embeddings(len(self.tokenizer))
24
+ model = PeftModel.from_pretrained(model, path)
25
  # except Exception:
26
  # model = AutoModelForCausalLM.from_pretrained(
27
  # path, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True
 
35
  parameters = data.pop("parameters", None)
36
 
37
  # preprocess
38
+ inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
39
 
40
  # pass inputs with all kwargs in data
41
  if parameters is not None:
42
+ outputs = self.model.generate(**inputs, max_new_tokens=880, **parameters)
43
  else:
44
+ outputs = self.model.generate(**inputs, max_new_tokens=880)
45
 
46
  # postprocess the prediction
47
  prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)