Crystalcareai
commited on
Commit
•
2d89033
1
Parent(s):
605c7ef
Update modeling_gemmoe.py
Browse files- modeling_gemmoe.py +0 -12
modeling_gemmoe.py
CHANGED
@@ -743,22 +743,12 @@ class GemmoeDecoderLayer(nn.Module):
|
|
743 |
output_attentions=output_attentions,
|
744 |
use_cache=use_cache,
|
745 |
)
|
746 |
-
|
747 |
-
# Check if the tensor sizes match before adding residual
|
748 |
-
if hidden_states.size() != residual.size():
|
749 |
-
hidden_states = hidden_states[:, -residual.size(1):, :]
|
750 |
-
|
751 |
hidden_states = residual + hidden_states
|
752 |
|
753 |
# Fully Connected
|
754 |
residual = hidden_states
|
755 |
hidden_states = self.post_attention_layernorm(hidden_states)
|
756 |
hidden_states, router_logits = self.block_sparse_moe(hidden_states)
|
757 |
-
|
758 |
-
# Check if the tensor sizes match before adding residual
|
759 |
-
if hidden_states.size() != residual.size():
|
760 |
-
hidden_states = hidden_states[:, -residual.size(1):, :]
|
761 |
-
|
762 |
hidden_states = residual + hidden_states
|
763 |
|
764 |
outputs = (hidden_states,)
|
@@ -775,8 +765,6 @@ class GemmoeDecoderLayer(nn.Module):
|
|
775 |
return outputs
|
776 |
|
777 |
|
778 |
-
|
779 |
-
|
780 |
GEMMOE_START_DOCSTRING = r"""
|
781 |
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
782 |
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|
|
|
743 |
output_attentions=output_attentions,
|
744 |
use_cache=use_cache,
|
745 |
)
|
|
|
|
|
|
|
|
|
|
|
746 |
hidden_states = residual + hidden_states
|
747 |
|
748 |
# Fully Connected
|
749 |
residual = hidden_states
|
750 |
hidden_states = self.post_attention_layernorm(hidden_states)
|
751 |
hidden_states, router_logits = self.block_sparse_moe(hidden_states)
|
|
|
|
|
|
|
|
|
|
|
752 |
hidden_states = residual + hidden_states
|
753 |
|
754 |
outputs = (hidden_states,)
|
|
|
765 |
return outputs
|
766 |
|
767 |
|
|
|
|
|
768 |
GEMMOE_START_DOCSTRING = r"""
|
769 |
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
770 |
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|