Add print statements
Browse files- modeling_cogvlm.py +41 -35
modeling_cogvlm.py
CHANGED
@@ -456,6 +456,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
456 |
output_attentions: Optional[bool] = None,
|
457 |
output_hidden_states: Optional[bool] = None,
|
458 |
return_dict: Optional[bool] = None,
|
|
|
459 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
460 |
"""take care of image_encode, token_type_ids, position_ids and (attention_mask = None is fine)"""
|
461 |
|
@@ -527,6 +528,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
527 |
output_attentions=output_attentions,
|
528 |
output_hidden_states=output_hidden_states,
|
529 |
return_dict=return_dict,
|
|
|
530 |
)
|
531 |
|
532 |
def llm_forward(
|
@@ -541,6 +543,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
541 |
output_attentions: Optional[bool] = None,
|
542 |
output_hidden_states: Optional[bool] = None,
|
543 |
return_dict: Optional[bool] = None,
|
|
|
544 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
545 |
"""largely copy from llama forward and adapt for cogvlm with `token_type_ids`"""
|
546 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
@@ -590,41 +593,42 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
590 |
|
591 |
hidden_states = inputs_embeds
|
592 |
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
|
|
628 |
|
629 |
# decoder layers
|
630 |
all_hidden_states = () if output_hidden_states else None
|
@@ -774,6 +778,7 @@ class CogVLMForCausalLM(CogVLMPreTrainedModel):
|
|
774 |
output_hidden_states: Optional[bool] = None,
|
775 |
return_dict: Optional[bool] = None,
|
776 |
labels: Optional[torch.LongTensor] = None,
|
|
|
777 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
778 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
779 |
output_hidden_states = (
|
@@ -794,6 +799,7 @@ class CogVLMForCausalLM(CogVLMPreTrainedModel):
|
|
794 |
output_attentions=output_attentions,
|
795 |
output_hidden_states=output_hidden_states,
|
796 |
return_dict=return_dict,
|
|
|
797 |
)
|
798 |
|
799 |
hidden_states = outputs[0]
|
|
|
456 |
output_attentions: Optional[bool] = None,
|
457 |
output_hidden_states: Optional[bool] = None,
|
458 |
return_dict: Optional[bool] = None,
|
459 |
+
step: int = None,
|
460 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
461 |
"""take care of image_encode, token_type_ids, position_ids and (attention_mask = None is fine)"""
|
462 |
|
|
|
528 |
output_attentions=output_attentions,
|
529 |
output_hidden_states=output_hidden_states,
|
530 |
return_dict=return_dict,
|
531 |
+
step=step,
|
532 |
)
|
533 |
|
534 |
def llm_forward(
|
|
|
543 |
output_attentions: Optional[bool] = None,
|
544 |
output_hidden_states: Optional[bool] = None,
|
545 |
return_dict: Optional[bool] = None,
|
546 |
+
step: int = None,
|
547 |
) -> Union[Tuple, BaseModelOutputWithPast]:
|
548 |
"""largely copy from llama forward and adapt for cogvlm with `token_type_ids`"""
|
549 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
|
593 |
|
594 |
hidden_states = inputs_embeds
|
595 |
|
596 |
+
if step == 1:
|
597 |
+
torch.save(hidden_states, "hidden_states_step_1.pt")
|
598 |
+
torch.save(attention_mask, "attention_mask_step_1.pt")
|
599 |
+
torch.save(token_type_ids, "token_type_ids_step_1.pt")
|
600 |
+
torch.save(position_ids, "position_ids_step_1.pt")
|
601 |
+
|
602 |
+
from huggingface_hub import HfApi
|
603 |
+
|
604 |
+
api = HfApi()
|
605 |
+
api.upload_file(
|
606 |
+
path_or_fileobj="hidden_states_step_1.pt",
|
607 |
+
path_in_repo="hidden_states_step_1.pt",
|
608 |
+
repo_id="nielsr/test-cogvlm",
|
609 |
+
repo_type="dataset",
|
610 |
+
)
|
611 |
+
api = HfApi()
|
612 |
+
api.upload_file(
|
613 |
+
path_or_fileobj="attention_mask_step_1.pt",
|
614 |
+
path_in_repo="attention_mask_step_1.pt",
|
615 |
+
repo_id="nielsr/test-cogvlm",
|
616 |
+
repo_type="dataset",
|
617 |
+
)
|
618 |
+
api = HfApi()
|
619 |
+
api.upload_file(
|
620 |
+
path_or_fileobj="token_type_ids_step_1.pt",
|
621 |
+
path_in_repo="token_type_ids_step_1.pt",
|
622 |
+
repo_id="nielsr/test-cogvlm",
|
623 |
+
repo_type="dataset",
|
624 |
+
)
|
625 |
+
api = HfApi()
|
626 |
+
api.upload_file(
|
627 |
+
path_or_fileobj="position_ids_step_1.pt",
|
628 |
+
path_in_repo="position_ids_step_1.pt",
|
629 |
+
repo_id="nielsr/test-cogvlm",
|
630 |
+
repo_type="dataset",
|
631 |
+
)
|
632 |
|
633 |
# decoder layers
|
634 |
all_hidden_states = () if output_hidden_states else None
|
|
|
778 |
output_hidden_states: Optional[bool] = None,
|
779 |
return_dict: Optional[bool] = None,
|
780 |
labels: Optional[torch.LongTensor] = None,
|
781 |
+
step: int = None,
|
782 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
783 |
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
784 |
output_hidden_states = (
|
|
|
799 |
output_attentions=output_attentions,
|
800 |
output_hidden_states=output_hidden_states,
|
801 |
return_dict=return_dict,
|
802 |
+
step=step,
|
803 |
)
|
804 |
|
805 |
hidden_states = outputs[0]
|