GradientGuru
commited on
Commit
•
0ef0739
1
Parent(s):
a731bb0
Update modeling_baichuan.py
Browse files- modeling_baichuan.py +7 -6
modeling_baichuan.py
CHANGED
@@ -35,6 +35,7 @@ def _fill_with_neg_inf(t):
|
|
35 |
return t.float().fill_(float("-inf")).type_as(t)
|
36 |
|
37 |
def _gen_alibi_mask(n_head, max_pos):
|
|
|
38 |
slopes = torch.Tensor(_get_interleave(n_head))
|
39 |
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
|
40 |
n_head, -1, -1)
|
@@ -46,7 +47,7 @@ def _gen_alibi_mask(n_head, max_pos):
|
|
46 |
return alibi_mask
|
47 |
|
48 |
def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
|
49 |
-
"""
|
50 |
dim = tensor.size(1)
|
51 |
_future_mask = torch.triu(
|
52 |
_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
|
@@ -235,7 +236,6 @@ class BaichuanPreTrainedModel(PreTrainedModel):
|
|
235 |
module.gradient_checkpointing = value
|
236 |
|
237 |
|
238 |
-
|
239 |
class BaichuanModel(BaichuanPreTrainedModel):
|
240 |
def __init__(self, config: BaichuanConfig):
|
241 |
super().__init__(config)
|
@@ -288,7 +288,6 @@ class BaichuanModel(BaichuanPreTrainedModel):
|
|
288 |
return_dict: Optional[bool] = True,
|
289 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
290 |
|
291 |
-
|
292 |
if input_ids is not None and inputs_embeds is not None:
|
293 |
raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
|
294 |
elif input_ids is not None:
|
@@ -298,6 +297,8 @@ class BaichuanModel(BaichuanPreTrainedModel):
|
|
298 |
else:
|
299 |
raise ValueError("You need to provide input_ids or inputs_embeds")
|
300 |
|
|
|
|
|
301 |
seq_length_with_past = seq_length
|
302 |
|
303 |
if past_key_values is not None:
|
@@ -440,7 +441,8 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
440 |
**kwargs
|
441 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
442 |
|
443 |
-
|
|
|
444 |
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
445 |
outputs = self.model(
|
446 |
input_ids=input_ids,
|
@@ -514,7 +516,6 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
514 |
for layer_past in past_key_values
|
515 |
)
|
516 |
|
517 |
-
|
518 |
def quantize(self, bits: int):
|
519 |
try:
|
520 |
from .quantizer import QLinear
|
@@ -603,4 +604,4 @@ class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
|
603 |
self.__class__.generate = PreTrainedModel.generate # disable stream
|
604 |
outputs = self.generate(input_ids, generation_config=generation_config)
|
605 |
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
606 |
-
return response
|
|
|
35 |
return t.float().fill_(float("-inf")).type_as(t)
|
36 |
|
37 |
def _gen_alibi_mask(n_head, max_pos):
|
38 |
+
"""used in inference only"""
|
39 |
slopes = torch.Tensor(_get_interleave(n_head))
|
40 |
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
|
41 |
n_head, -1, -1)
|
|
|
47 |
return alibi_mask
|
48 |
|
49 |
def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
|
50 |
+
"""used in training only"""
|
51 |
dim = tensor.size(1)
|
52 |
_future_mask = torch.triu(
|
53 |
_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
|
|
|
236 |
module.gradient_checkpointing = value
|
237 |
|
238 |
|
|
|
239 |
class BaichuanModel(BaichuanPreTrainedModel):
|
240 |
def __init__(self, config: BaichuanConfig):
|
241 |
super().__init__(config)
|
|
|
288 |
return_dict: Optional[bool] = True,
|
289 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
290 |
|
|
|
291 |
if input_ids is not None and inputs_embeds is not None:
|
292 |
raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
|
293 |
elif input_ids is not None:
|
|
|
297 |
else:
|
298 |
raise ValueError("You need to provide input_ids or inputs_embeds")
|
299 |
|
300 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
301 |
+
|
302 |
seq_length_with_past = seq_length
|
303 |
|
304 |
if past_key_values is not None:
|
|
|
441 |
**kwargs
|
442 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
443 |
|
444 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
445 |
+
|
446 |
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
447 |
outputs = self.model(
|
448 |
input_ids=input_ids,
|
|
|
516 |
for layer_past in past_key_values
|
517 |
)
|
518 |
|
|
|
519 |
def quantize(self, bits: int):
|
520 |
try:
|
521 |
from .quantizer import QLinear
|
|
|
604 |
self.__class__.generate = PreTrainedModel.generate # disable stream
|
605 |
outputs = self.generate(input_ids, generation_config=generation_config)
|
606 |
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
607 |
+
return response
|