Add print statements
Browse files- modeling_cogvlm.py +7 -2
modeling_cogvlm.py
CHANGED
@@ -434,12 +434,17 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
434 |
assert len(input_ids) == len(images), f"{len(input_ids)} {len(images)}"
|
435 |
inputs_embeds = self.embed_tokens(input_ids)
|
436 |
|
437 |
-
print("First values of text embeddings:", inputs_embeds[0, :3, :3])
|
438 |
-
|
439 |
images_features = self.encode_images(images)
|
440 |
images_features = rearrange(images_features, 'b n d -> (b n) d')
|
441 |
images_features = images_features.to(dtype=inputs_embeds.dtype, device=inputs_embeds.device)
|
|
|
|
|
|
|
|
|
442 |
inputs_embeds = inputs_embeds.index_put([token_type_ids == VISION_TOKEN_TYPE], images_features)
|
|
|
|
|
|
|
443 |
else: # single-modality
|
444 |
if token_type_ids is None:
|
445 |
token_type_ids = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device) * LANGUAGE_TOKEN_TYPE
|
|
|
434 |
assert len(input_ids) == len(images), f"{len(input_ids)} {len(images)}"
|
435 |
inputs_embeds = self.embed_tokens(input_ids)
|
436 |
|
|
|
|
|
437 |
images_features = self.encode_images(images)
|
438 |
images_features = rearrange(images_features, 'b n d -> (b n) d')
|
439 |
images_features = images_features.to(dtype=inputs_embeds.dtype, device=inputs_embeds.device)
|
440 |
+
|
441 |
+
print("First values of text embeddings:", inputs_embeds[0, :3, :3])
|
442 |
+
print("First values of images_features:", images_features[0, :3, :3])
|
443 |
+
|
444 |
inputs_embeds = inputs_embeds.index_put([token_type_ids == VISION_TOKEN_TYPE], images_features)
|
445 |
+
|
446 |
+
print("First values of inputs_embeds after index_put:", inputs_embeds[0, :3, :3])
|
447 |
+
|
448 |
else: # single-modality
|
449 |
if token_type_ids is None:
|
450 |
token_type_ids = torch.ones_like(input_ids, dtype=torch.long, device=input_ids.device) * LANGUAGE_TOKEN_TYPE
|