baichuan-inc
/

Baichuan2-7B-Base

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

fix left padding batch infer

#9

by kuaizhirui - opened Jun 18

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

Files changed (1) hide show

modeling_baichuan.py +7 -4

modeling_baichuan.py CHANGED Viewed

@@ -358,10 +358,13 @@ class BaichuanModel(BaichuanPreTrainedModel):
             expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
                 inputs_embeds.device
             )
-            combined_attention_mask = (
-                expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
-            )
         return combined_attention_mask
     def forward(

             expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
                 inputs_embeds.device
             )
+            if combined_attention_mask is None:
+                combined_attention_mask = expanded_attn_mask
+            else:
+                expanded_attn_mask = torch.where(expanded_attn_mask == torch.finfo(inputs_embeds.dtype).min, torch.finfo(inputs_embeds.dtype).min / 2, expanded_attn_mask)
+                combined_attention_mask = torch.where(combined_attention_mask == torch.finfo(inputs_embeds.dtype).min, torch.finfo(inputs_embeds.dtype).min / 2, expanded_attn_mask)
+                combined_attention_mask = expanded_attn_mask + combined_attention_mask
         return combined_attention_mask
     def forward(