Crystalcareai commited on
Commit
2d89033
1 Parent(s): 605c7ef

Update modeling_gemmoe.py

Browse files
Files changed (1) hide show
  1. modeling_gemmoe.py +0 -12
modeling_gemmoe.py CHANGED
@@ -743,22 +743,12 @@ class GemmoeDecoderLayer(nn.Module):
743
  output_attentions=output_attentions,
744
  use_cache=use_cache,
745
  )
746
-
747
- # Check if the tensor sizes match before adding residual
748
- if hidden_states.size() != residual.size():
749
- hidden_states = hidden_states[:, -residual.size(1):, :]
750
-
751
  hidden_states = residual + hidden_states
752
 
753
  # Fully Connected
754
  residual = hidden_states
755
  hidden_states = self.post_attention_layernorm(hidden_states)
756
  hidden_states, router_logits = self.block_sparse_moe(hidden_states)
757
-
758
- # Check if the tensor sizes match before adding residual
759
- if hidden_states.size() != residual.size():
760
- hidden_states = hidden_states[:, -residual.size(1):, :]
761
-
762
  hidden_states = residual + hidden_states
763
 
764
  outputs = (hidden_states,)
@@ -775,8 +765,6 @@ class GemmoeDecoderLayer(nn.Module):
775
  return outputs
776
 
777
 
778
-
779
-
780
  GEMMOE_START_DOCSTRING = r"""
781
  This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
782
  library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
 
743
  output_attentions=output_attentions,
744
  use_cache=use_cache,
745
  )
 
 
 
 
 
746
  hidden_states = residual + hidden_states
747
 
748
  # Fully Connected
749
  residual = hidden_states
750
  hidden_states = self.post_attention_layernorm(hidden_states)
751
  hidden_states, router_logits = self.block_sparse_moe(hidden_states)
 
 
 
 
 
752
  hidden_states = residual + hidden_states
753
 
754
  outputs = (hidden_states,)
 
765
  return outputs
766
 
767
 
 
 
768
  GEMMOE_START_DOCSTRING = r"""
769
  This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
770
  library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads