Getting a bounding box related error while using layoutLMv2 model

#4
by Kstar24 - opened

resize = transforms.Resize((224, 224))
set_seed(0)

processor = AutoProcessor.from_pretrained("tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa", apply_ocr=True)
tokenizer = AutoTokenizer.from_pretrained("tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa")
model = AutoModelForDocumentQuestionAnswering.from_pretrained("tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa")

filePath = "file/path/to/image.jpeg"
image = Image.open(filePath).convert("RGB")
image = resize(image)
print("image : ", image.size)

inputs = processor(images=image, return_tensors="pt")

position_ids = torch.arange(inputs.input_ids.size(1), dtype=torch.long, device=inputs.input_ids.device)
position_ids = position_ids.unsqueeze(0).expand(inputs.input_ids.size())

words = ['RELIANCE', 'RETAIL', 'LIMITED', 'Phoenix', 'Market', 'City', 'Velachery', 'Main',.....]
bounding_boxes = [[100, 232, 206, 261], [220, 233, 299, 261], [313, 234, 404, 262], [99, 273, 192, 299],.....]
question = ["what", "is", "the", "tax", "invoice", "no?"]

totalWords = words + question
totalBoxes = bounding_boxes + [[0, 0, 0, 0] for _ in range(len(question))]

question_tokenized = tokenizer(totalWords, boxes=totalBoxes, return_tensors="pt")
question_input_ids = question_tokenized['input_ids']

inputs = {
"input_ids": question_input_ids,
"bbox": question_tokenized['bbox'],
"image": inputs.image,
"attention_mask": question_tokenized['attention_mask'],
"token_type_ids": question_tokenized['token_type_ids'],
"position_ids": position_ids,
}
outputs = model(**inputs)
predicted_start_idx = outputs.start_logits.argmax(-1).item()
predicted_end_idx = outputs.end_logits.argmax(-1).item()

predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]
predicted_answer = processor.tokenizer.decode(predicted_answer_tokens)

When I run this script, I'm expreriencing this error,
raise IndexError("The bbox coordinate values should be within 0-1000 range.") from e
IndexError: The bbox coordinate values should be within 0-1000 range.

My bounding boxes have co-ordinate values above 1000. How am I suppose to solve this error. Please let me know if you need any more info. Thank you.

Sign up or log in to comment