GradientGuru commited on
Commit
0ef0739
1 Parent(s): a731bb0

Update modeling_baichuan.py

Browse files
Files changed (1) hide show
  1. modeling_baichuan.py +7 -6
modeling_baichuan.py CHANGED
@@ -35,6 +35,7 @@ def _fill_with_neg_inf(t):
35
  return t.float().fill_(float("-inf")).type_as(t)
36
 
37
  def _gen_alibi_mask(n_head, max_pos):
 
38
  slopes = torch.Tensor(_get_interleave(n_head))
39
  alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
40
  n_head, -1, -1)
@@ -46,7 +47,7 @@ def _gen_alibi_mask(n_head, max_pos):
46
  return alibi_mask
47
 
48
  def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
49
- """for training only"""
50
  dim = tensor.size(1)
51
  _future_mask = torch.triu(
52
  _fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
@@ -235,7 +236,6 @@ class BaichuanPreTrainedModel(PreTrainedModel):
235
  module.gradient_checkpointing = value
236
 
237
 
238
-
239
  class BaichuanModel(BaichuanPreTrainedModel):
240
  def __init__(self, config: BaichuanConfig):
241
  super().__init__(config)
@@ -288,7 +288,6 @@ class BaichuanModel(BaichuanPreTrainedModel):
288
  return_dict: Optional[bool] = True,
289
  ) -> Union[Tuple, BaseModelOutputWithPast]:
290
 
291
-
292
  if input_ids is not None and inputs_embeds is not None:
293
  raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
294
  elif input_ids is not None:
@@ -298,6 +297,8 @@ class BaichuanModel(BaichuanPreTrainedModel):
298
  else:
299
  raise ValueError("You need to provide input_ids or inputs_embeds")
300
 
 
 
301
  seq_length_with_past = seq_length
302
 
303
  if past_key_values is not None:
@@ -440,7 +441,8 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
440
  **kwargs
441
  ) -> Union[Tuple, CausalLMOutputWithPast]:
442
 
443
-
 
444
  # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
445
  outputs = self.model(
446
  input_ids=input_ids,
@@ -514,7 +516,6 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
514
  for layer_past in past_key_values
515
  )
516
 
517
-
518
  def quantize(self, bits: int):
519
  try:
520
  from .quantizer import QLinear
@@ -603,4 +604,4 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
603
  self.__class__.generate = PreTrainedModel.generate # disable stream
604
  outputs = self.generate(input_ids, generation_config=generation_config)
605
  response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
606
- return response
 
35
  return t.float().fill_(float("-inf")).type_as(t)
36
 
37
  def _gen_alibi_mask(n_head, max_pos):
38
+ """used in inference only"""
39
  slopes = torch.Tensor(_get_interleave(n_head))
40
  alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
41
  n_head, -1, -1)
 
47
  return alibi_mask
48
 
49
  def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
50
+ """used in training only"""
51
  dim = tensor.size(1)
52
  _future_mask = torch.triu(
53
  _fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
 
236
  module.gradient_checkpointing = value
237
 
238
 
 
239
  class BaichuanModel(BaichuanPreTrainedModel):
240
  def __init__(self, config: BaichuanConfig):
241
  super().__init__(config)
 
288
  return_dict: Optional[bool] = True,
289
  ) -> Union[Tuple, BaseModelOutputWithPast]:
290
 
 
291
  if input_ids is not None and inputs_embeds is not None:
292
  raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
293
  elif input_ids is not None:
 
297
  else:
298
  raise ValueError("You need to provide input_ids or inputs_embeds")
299
 
300
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
301
+
302
  seq_length_with_past = seq_length
303
 
304
  if past_key_values is not None:
 
441
  **kwargs
442
  ) -> Union[Tuple, CausalLMOutputWithPast]:
443
 
444
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
445
+
446
  # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
447
  outputs = self.model(
448
  input_ids=input_ids,
 
516
  for layer_past in past_key_values
517
  )
518
 
 
519
  def quantize(self, bits: int):
520
  try:
521
  from .quantizer import QLinear
 
604
  self.__class__.generate = PreTrainedModel.generate # disable stream
605
  outputs = self.generate(input_ids, generation_config=generation_config)
606
  response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
607
+ return response