leoxiaobin mber commited on
Commit
b161793
1 Parent(s): 866d169

Fix number of image tokens (#51)

Browse files

- Fix number of image tokens (4eedefefd840f457373d13c6043ef9aed8153c78)


Co-authored-by: Moshe Berchansky <[email protected]>

Files changed (1) hide show
  1. image_processing_phi3_v.py +1 -1
image_processing_phi3_v.py CHANGED
@@ -250,7 +250,7 @@ class Phi3VImageProcessor(BaseImageProcessor):
250
 
251
  # [(3, h, w)], where h, w is multiple of 336
252
  shapes = [[im.size(1), im.size(2)] for im in hd_images]
253
- num_img_tokens = [int((h//336*w//336+1)*144 + 1 + (h//336+1)*12) for h, w in shapes]
254
  # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
255
  # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
256
  hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)]
 
250
 
251
  # [(3, h, w)], where h, w is multiple of 336
252
  shapes = [[im.size(1), im.size(2)] for im in hd_images]
253
+ num_img_tokens = [int(((h//336)*(w//336)+1)*144 + 1 + (h//336+1)*12) for h, w in shapes]
254
  # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336)
255
  # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336)
256
  hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)]