Add print statements
Browse files- modeling_cogvlm.py +5 -9
modeling_cogvlm.py
CHANGED
@@ -296,8 +296,8 @@ class CogVLMDecoderLayer(nn.Module):
|
|
296 |
|
297 |
hidden_states = self.input_layernorm(hidden_states)
|
298 |
|
299 |
-
|
300 |
-
|
301 |
|
302 |
# Self Attention
|
303 |
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
@@ -310,8 +310,8 @@ class CogVLMDecoderLayer(nn.Module):
|
|
310 |
use_cache=use_cache,
|
311 |
)
|
312 |
|
313 |
-
|
314 |
-
|
315 |
|
316 |
hidden_states = residual + hidden_states
|
317 |
|
@@ -600,10 +600,6 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
600 |
if output_hidden_states:
|
601 |
all_hidden_states += (hidden_states,)
|
602 |
|
603 |
-
# if idx in [0, 1, 2]:
|
604 |
-
# print(f"Hidden states before layer {idx}", hidden_states[0,:3,:3])
|
605 |
-
# print(f"Mean of hidden states before layer {idx}", hidden_states.mean())
|
606 |
-
|
607 |
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
608 |
layer_outputs = decoder_layer(
|
609 |
hidden_states,
|
@@ -613,7 +609,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
613 |
past_key_value=past_key_value,
|
614 |
output_attentions=output_attentions,
|
615 |
use_cache=use_cache,
|
616 |
-
print_values=idx
|
617 |
)
|
618 |
hidden_states = layer_outputs[0]
|
619 |
|
|
|
296 |
|
297 |
hidden_states = self.input_layernorm(hidden_states)
|
298 |
|
299 |
+
if print_values:
|
300 |
+
print("Hidden states before self attention:", hidden_states[0,:3,:3])
|
301 |
|
302 |
# Self Attention
|
303 |
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
|
|
310 |
use_cache=use_cache,
|
311 |
)
|
312 |
|
313 |
+
if print_values:
|
314 |
+
print("Hidden states after self attention:", hidden_states[0,:3,:3])
|
315 |
|
316 |
hidden_states = residual + hidden_states
|
317 |
|
|
|
600 |
if output_hidden_states:
|
601 |
all_hidden_states += (hidden_states,)
|
602 |
|
|
|
|
|
|
|
|
|
603 |
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
604 |
layer_outputs = decoder_layer(
|
605 |
hidden_states,
|
|
|
609 |
past_key_value=past_key_value,
|
610 |
output_attentions=output_attentions,
|
611 |
use_cache=use_cache,
|
612 |
+
print_values=idx==0,
|
613 |
)
|
614 |
hidden_states = layer_outputs[0]
|
615 |
|