xj commited on
Commit
6f5c44e
1 Parent(s): 5cd9ad6

[feat] add benchmark results and huge ckpt/logs

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. benchmark/guesswhat_grounding/huge-alt-1/20230516-132049.json +0 -0
  3. benchmark/guesswhat_grounding/huge-main-0.7630/20230512-185344.json +0 -0
  4. benchmark/guesswhat_grounding/huge-main-0.7740/20230512-185556.json +0 -0
  5. benchmark/guesswhat_grounding/invig-large/20230514-044035.json +0 -0
  6. benchmark/guesswhat_grounding/invig-large/20230516-125105.json +0 -0
  7. benchmark/guesswhat_grounding/invig-large/20230516-183341.json +0 -0
  8. benchmark/guesswhat_grounding/invig-m-dialog/20230513-033458.json +0 -0
  9. benchmark/guesswhat_grounding/invig-m-grounding/20230514-111541.json +0 -0
  10. benchmark/guesswhat_grounding/large-invig+guesswhat/20230516-163011.json +0 -0
  11. benchmark/guesswhat_grounding/large-o-grounding+guesswhat/20230512-175038.json +0 -0
  12. benchmark/guesswhat_grounding/large-o-guesswhat/20230512-173248.json +0 -0
  13. benchmark/guesswhat_grounding/large-o-invig/20230515-212817.json +0 -0
  14. benchmark/guesswhat_grounding_end2end/huge-alt-1/20230516-130735.json +0 -0
  15. benchmark/guesswhat_grounding_end2end/huge-main-0.7630/20230513-011551.json +0 -0
  16. benchmark/guesswhat_grounding_end2end/huge-main-0.7740/20230513-012209.json +0 -0
  17. benchmark/guesswhat_grounding_end2end/invig-large/20230514-044110.json +0 -0
  18. benchmark/guesswhat_grounding_end2end/invig-large/20230516-183512.json +0 -0
  19. benchmark/guesswhat_grounding_end2end/invig-large/20230516-183613.json +0 -0
  20. benchmark/guesswhat_grounding_end2end/invig-large/20230516-183750.json +0 -0
  21. benchmark/guesswhat_grounding_end2end/invig-m-dialog/20230513-034220.json +0 -0
  22. benchmark/guesswhat_grounding_end2end/invig-m-grounding/20230514-074730.json +0 -0
  23. benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-163838.json +0 -0
  24. benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-164712.json +0 -0
  25. benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-165115.json +0 -0
  26. benchmark/guesswhat_grounding_end2end/large-o-grounding+guesswhat/20230513-012017.json +0 -0
  27. benchmark/guesswhat_grounding_end2end/large-o-guesswhat/20230513-011852.json +0 -0
  28. benchmark/guesswhat_grounding_end2end/large-o-invig/20230515-213019.json +0 -0
  29. benchmark/guesswhat_oracle/huge-main-0.7630/20230512-213406.json +0 -0
  30. benchmark/guesswhat_oracle/invig-large/20230514-043946.json +0 -0
  31. benchmark/guesswhat_oracle/invig-large/20230516-183935.json +0 -0
  32. benchmark/guesswhat_oracle/invig-m-dialog/20230513-034055.json +0 -0
  33. benchmark/guesswhat_oracle/invig-m-grounding/20230514-115536.json +0 -0
  34. benchmark/guesswhat_oracle/large-invig+guesswhat/20230516-163305.json +0 -0
  35. benchmark/guesswhat_oracle/large-o-grounding+guesswhat/20230512-164938.json +0 -0
  36. benchmark/guesswhat_oracle/large-o-guesswhat/20230512-160114.json +17 -0
  37. benchmark/guesswhat_oracle/large-o-guesswhat/20230512-175119.json +0 -0
  38. benchmark/guesswhat_oracle/large-o-invig/20230515-213052.json +0 -0
  39. benchmark/guesswhat_oracle/xvlm/20230517-022316.json +4 -0
  40. benchmark/human_eval/ours_20230515-194448.json +130 -0
  41. benchmark/human_eval/xvlm_20230515-192335.json +130 -0
  42. benchmark/invig_grounding/huge-main-0.7630/20230512-181257.json +0 -0
  43. benchmark/invig_grounding/huge-main-0.7740/20230512-182551.json +0 -0
  44. benchmark/invig_grounding/invig-large/20230514-043824.json +0 -0
  45. benchmark/invig_grounding/invig-large/20230516-090623.json +0 -0
  46. benchmark/invig_grounding/invig-large/20230516-091634.json +0 -0
  47. benchmark/invig_grounding/invig-large/20230516-113956.json +0 -0
  48. benchmark/invig_grounding/invig-large/20230516-141445.json +0 -0
  49. benchmark/invig_grounding/invig-large/20230516-165140.json +0 -0
  50. benchmark/invig_grounding/invig-m-dialog/20230513-033645.json +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **/.ipynb_checkpoints
benchmark/guesswhat_grounding/huge-alt-1/20230516-132049.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/huge-main-0.7630/20230512-185344.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/huge-main-0.7740/20230512-185556.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/invig-large/20230514-044035.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/invig-large/20230516-125105.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/invig-large/20230516-183341.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/invig-m-dialog/20230513-033458.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/invig-m-grounding/20230514-111541.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/large-invig+guesswhat/20230516-163011.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/large-o-grounding+guesswhat/20230512-175038.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/large-o-guesswhat/20230512-173248.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding/large-o-invig/20230515-212817.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/huge-alt-1/20230516-130735.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/huge-main-0.7630/20230513-011551.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/huge-main-0.7740/20230513-012209.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-large/20230514-044110.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183512.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183613.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-large/20230516-183750.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-m-dialog/20230513-034220.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/invig-m-grounding/20230514-074730.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-163838.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-164712.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-invig+guesswhat/20230516-165115.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-o-grounding+guesswhat/20230513-012017.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-o-guesswhat/20230513-011852.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_grounding_end2end/large-o-invig/20230515-213019.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/guesswhat_oracle/huge-main-0.7630/20230512-213406.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/invig-large/20230514-043946.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/invig-large/20230516-183935.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/invig-m-dialog/20230513-034055.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/invig-m-grounding/20230514-115536.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/large-invig+guesswhat/20230516-163305.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/large-o-grounding+guesswhat/20230512-164938.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/large-o-guesswhat/20230512-160114.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ["index", "text_input", "text_output", "text_gen"]
2
+ [0, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it a guy?\"\n#region: <bin_325> <bin_574> <bin_689> <bin_849>", " no.", " no."]
3
+ [1, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it a guy? agent: no. human: is it a racket?\"\n#region: <bin_325> <bin_574> <bin_689> <bin_849>", " yes.", " yes."]
4
+ [2, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it electronic?\"\n#region: <bin_781> <bin_309> <bin_988> <bin_677>", " no.", " no."]
5
+ [3, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it electronic? agent: no. human: is it a person?\"\n#region: <bin_781> <bin_309> <bin_988> <bin_677>", " yes.", " yes."]
6
+ [4, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it considered food?\"\n#region: <bin_176> <bin_135> <bin_452> <bin_516>", " yes.", " yes."]
7
+ [5, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it considered food? agent: yes. human: is it yellow?\"\n#region: <bin_176> <bin_135> <bin_452> <bin_516>", " no.", " no."]
8
+ [6, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
9
+ [7, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky? agent: no. human: is it the umbrella?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
10
+ [8, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it in the sky? agent: no. human: is it the umbrella? agent: no. human: is it the ocean?\"\n#region: <bin_837> <bin_877> <bin_959> <bin_962>", " no.", " no."]
11
+ [9, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it food?\"\n#region: <bin_589> <bin_434> <bin_998> <bin_779>", " no.", " no."]
12
+ [10, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it food? agent: no. human: is it a drink?\"\n#region: <bin_589> <bin_434> <bin_998> <bin_779>", " no.", " no."]
13
+ [11, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
14
+ [12, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
15
+ [13, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " yes.", " yes."]
16
+ [14, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person? agent: yes. human: is it on the left?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
17
+ [15, " \n#instruction: answer the question based on the region with yes or no.\n#context: \"human: is it the boat? agent: no. human: is it a goat? agent: no. human: is it a person? agent: yes. human: is it on the left? agent: no. human: is it black?\"\n#region: <bin_499> <bin_297> <bin_525> <bin_331>", " no.", " no."]
benchmark/guesswhat_oracle/large-o-guesswhat/20230512-175119.json ADDED
Binary file (92 MB). View file
 
benchmark/guesswhat_oracle/large-o-invig/20230515-213052.json ADDED
Binary file (92.3 MB). View file
 
benchmark/guesswhat_oracle/xvlm/20230517-022316.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ["index", "text_input", "text_output_gt", "text_gen", "image_path"]
2
+ [1, [["", "is it a guy?"]], "no.", "no", "coco/train2014/COCO_train2014_000000579680.jpg"]
3
+ [2, [["", "is it a guy?"], ["No.", "is it a racket?"]], "yes.", "yes", "coco/train2014/COCO_train2014_000000579680.jpg"]
4
+ [3, [["", "is it a guy?"], ["No.", "is it a racket?"], ["Yes.", "is it on the right?"]], "no.", "no", "coco/train2014/COCO_train2014_000000579680.jpg"]
benchmark/human_eval/ours_20230515-194448.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [13, [["ride the bikes on the bottom left.", "which one do you want to ride?"], ["the closest one.", "is it the first one from the left?"], ["yeah.", "[0, 432, 96, 479]"]], [35, 422, 108, 479], [0, 432, 96, 479], "openimages_v1.2/imgs/2b240f8c7b095972_Bicycle_Bicycle wheel_Land vehicle_House_Vehicle_Building_Tire_4.jpg"]
2
+ [51, [["catch the duck on the far right.", "is it the first one from the right?"], ["yes.", "[499, 175, 611, 240]"]], [500, 172, 618, 238], [499, 175, 611, 240], "openimages_v1.2/imgs/0391aa378a4094b3_Bird_Duck_12.jpg"]
3
+ [54, [["mark the tallest penguin.", "is it the one in the middle?"], ["yes.", "[188, 115, 415, 545]"]], [187, 115, 420, 548], [188, 115, 415, 545], "openimages_v1.2/imgs/5cc33103845e5519_Bird_Penguin_Animal_4.jpg"]
4
+ [209, [["This tea set is so pretty.", "which one?"], ["It is in the middle.", "is that the teapot?"], ["yes", "[167, 39, 381, 217]"]], [168, 39, 378, 215], [167, 39, 381, 217], "openimages_v1.2/imgs/76005037c3e150f7_Teapot_Snack_Dessert_Food_Baked goods_Pastry_6.jpg"]
5
+ [228, [["I want to know its name.", "which one are you talking about?"], ["The bigger one.", "is it the first one from the right?"], ["yes", "[301, 0, 508, 523]"]], [0, 0, 509, 497], [301, 0, 508, 523], "openimages_v1.2/imgs/31d3d97132fbb2f2_Bird_Penguin_Animal_3.jpg"]
6
+ [255, [["It does not know if it is made by machine.", "is it a cookie?"], ["yes", "is it in the front row?"], ["seconed row", "is it on the left?"], ["yes", "is it the first one on the left?"], ["no", "is it the second one on the left?"], ["yes", "[0, 244, 202, 329]"]], [0, 241, 202, 330], [0, 244, 202, 329], "openimages_v1.2/imgs/467d1abed2e32295_Snack_Cookie_Food_Baked goods_17.jpg"]
7
+ [285, [["It does not look very strong", "is it a rooster?"], ["yes", "is it the rooster on the right?"], ["no, it\u2019s in the middle.", "[228, 213, 400, 400]"]], [231, 210, 402, 400], [228, 213, 400, 400], "openimages_v1.2/imgs/1797f4e85e52bc5a_Building_Bird_Chicken_Animal_4.jpg"]
8
+ [318, [["Give me that coffee cup.", "which one do you want?"], ["white one", "is it the one on the plate?"], ["yes", "[67, 9, 254, 165]"]], [68, 6, 252, 167], [67, 9, 254, 165], "openimages_v1.2/imgs/67c02c17c9a759c8_Drink_Coffee cup_Food_Saucer_Tableware_Vegetable_Salad_Broccoli_2.jpg"]
9
+ [326, [["Give me that action figures", "which one do you want"], ["It is in the middle.", "is it the first one from the left?"], ["no", "is it the second one?"], ["yes", "[343, 352, 394, 410]"]], [234, 228, 275, 288], [343, 352, 394, 410], "openimages_v1.2/imgs/bcc0fa02fdce3af0_Toy_Land vehicle_Vehicle_7.jpg"]
10
+ [333, [["This flower is interesting", "which one?"], ["The purple one", "is it the biggest one?"], ["no", "is it white?"], ["no", "is it next to the big one?"], ["yes", "[399, 259, 469, 348]"]], [397, 261, 468, 345], [399, 259, 469, 348], "openimages_v1.2/imgs/921e76975c6f820c_Plant_Rose_Flower_3.jpg"]
11
+ [393, [["The bird is cute", "which one?"], ["Bottom of the page", "what color is it?"], ["black and white", "is it on the right?"], ["no", "is it on the left?"], ["no", "is it in the middle?"], ["yes", "[226, 181, 315, 293]"]], [222, 279, 301, 394], [226, 181, 315, 293], "openimages_v1.2/imgs/ef4a0d60192428c1_Bird_Penguin_Animal_13.jpg"]
12
+ [407, [["It is so cool", "is it a train?"], ["yes", "is it the one in the middle?"], ["yes", "[94, 133, 608, 322]"]], [85, 121, 601, 337], [94, 133, 608, 322], "openimages_v1.2/imgs/91826655a7d34abd_Land vehicle_Train_Vehicle_Wheel_3.jpg"]
13
+ [429, [["It is really strong.", "which one are you talking about?"], ["the left one", "is it the first one from the left?"], ["yes", "[26, 184, 288, 361]"]], [28, 188, 282, 364], [26, 184, 288, 361], "openimages_v1.2/imgs/02c91a8515336047_Cattle_10.jpg"]
14
+ [440, [["What is the brand of this computer", "what brand is it"], ["what", "is it a laptop?"], ["yes", "is it the one on the left?"], ["yes", "[0, 0, 229, 257]"]], [0, 0, 230, 256], [0, 0, 229, 257], "openimages_v1.2/imgs/9b031924d17fc0d4_Beer_Laptop_Drink_Office supplies_Bottle_Wine_Book_2.jpg"]
15
+ [447, [["It\u2019s samll", "is it the front one?"], ["yes", "[321, 152, 602, 356]"]], [319, 156, 550, 331], [321, 152, 602, 356], "openimages_v1.2/imgs/008ede8abd811226_Squirrel_Animal_3.jpg"]
16
+ [451, [["It is in full bloom", "which one are you talking about"], ["the middle one", "is it the top one?"], ["no", "is it the bottom one?"], ["no", "is it the middle one?"], ["yes", "[268, 317, 352, 399]"]], [247, 244, 297, 333], [268, 317, 352, 399], "openimages_v1.2/imgs/f7f3a10c836226b2_Plant_Flower_Common sunflower_7.jpg"]
17
+ [457, [["It is strange", "is it a toy?"], ["yes", "is it yellow?"], ["no, the green one", "[76, 33, 258, 122]"]], [29, 348, 266, 440], [76, 33, 258, 122], "openimages_v1.2/imgs/15f3aeb130d0d03d_Toy_Land vehicle_Vehicle_Wheel_8.jpg"]
18
+ [466, [["What kind of animal is this?", "which one?"], ["the seconed one", "what color is it?"], ["black and white", "is it the second one from the right?"], ["yes", "[262, 27, 367, 307]"]], [277, 30, 354, 219], [262, 27, 367, 307], "openimages_v1.2/imgs/7fe6972c3f562011_Horse_Cattle_Animal_6.jpg"]
19
+ [476, [["Please pick one flower for me.", "which one do you want?"], ["Second from left.", "is it the first one from the left?"], ["No. It is the second from the left.", "[149, 175, 188, 207]"]], [154, 175, 190, 206], [149, 175, 188, 207], "openimages_v1.2/imgs/46ceefe0e0ecff39_Chair_Houseplant_Table_Plant_Flower_Furniture_Flowerpot_1.jpg"]
20
+ [501, [["Pass me the first carton.", "is it the first carton from the left?"], ["Yes.", "is it the first carton from the right?"], ["No. It is the first carton from the left.", "[58, 252, 329, 425]"]], [62, 250, 327, 425], [58, 252, 329, 425], "openimages_v1.2/imgs/2a1552207182c802_Box_4.jpg"]
21
+ [541, [["Pass me the plant pot.", "which pot are you talking about?"], ["The one in the middle.", "is it the one closest to us?"], ["Yes, it is.", "[276, 343, 346, 399]"]], [276, 344, 348, 401], [276, 343, 346, 399], "openimages_v1.2/imgs/b539ca908aece624_Fountain_Houseplant_Plant_Tree_Flower_Flowerpot_5.jpg"]
22
+ [546, [["Pass me the axe.", "which one do you want?"], ["I want the first one from the left.", "is it the first one from the left?"], ["Yes, it is.", "[137, 167, 249, 513]"]], [145, 169, 250, 514], [137, 167, 249, 513], "openimages_v1.2/imgs/5c7e5c0ac6b63b5e_Tool_Axe_4.jpg"]
23
+ [563, [["The flag is beautiful.", "which one are you talking about?"], ["The one in red, white and blue.", "is it the first one from the left?"], ["Yes, it is.", "[94, 171, 315, 307]"]], [97, 177, 315, 304], [94, 171, 315, 307], "openimages_v1.2/imgs/5d170945d5549dea_Flag_3.jpg"]
24
+ [569, [["The flag is beautiful.", "which one are you talking about?"], ["The one farthest away from us.", "is it the first one from the right?"], ["Yes, it is.", "[380, 228, 499, 343]"]], [378, 233, 493, 343], [380, 228, 499, 343], "openimages_v1.2/imgs/3bd5795f8469318e_Flag_3.jpg"]
25
+ [592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the right?"], ["No. It is the one on the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[12, 208, 176, 256]"]], [38, 206, 183, 254], [12, 208, 176, 256], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
26
+ [592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the right?"], ["No. It is the one on the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[12, 208, 176, 256]"]], [38, 206, 183, 254], [12, 208, 176, 256], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
27
+ [600, [["Pass me the cherry tomato.", "which one do you want?"], ["The one in the middle.", "is it the one next to the cucumber?"], ["No. It is the one on the cabbage.", "[128, 164, 256, 299]"]], [131, 168, 255, 290], [128, 164, 256, 299], "openimages_v1.2/imgs/0353b5f2eb7bd145_Cucumber_Zucchini_Food_Tomato_Vegetable_Cabbage_Salad_13.jpg"]
28
+ [689, [["The cake looks really nice.", "which one are you talking about?"], ["The red cake closest to us.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[135, 446, 285, 610]"]], [128, 445, 288, 608], [135, 446, 285, 610], "openimages_v1.2/imgs/ffcce9c7edd86436_Dessert_Food_Baked goods_Cake_Pastry_18.jpg"]
29
+ [696, [["Pass me the vass.", "which one are you talking about?"], ["The white vass with a red flower on it.", "there are two white vases with red flowers, which one is it?"], ["It is the one in the left.", "is it the first one from the right?"], ["No. It is the first one from the left.", "[268, 353, 344, 478]"]], [267, 353, 343, 478], [268, 353, 344, 478], "openimages_v1.2/imgs/a0abaa5bf4b660d8_Teapot_Vase_Tableware_Jug_Furniture_8.jpg"]
30
+ [704, [["The latte art is so beautiful.", "which one?"], ["The one in the right.", "is there a spoon next to it?"], ["Yes, it is.", "[264, 33, 409, 195]"]], [273, 41, 400, 120], [264, 33, 409, 195], "openimages_v1.2/imgs/0e4bf084d6b8b360_Croissant_Coffee_Saucer_4.jpg"]
31
+ [727, [["Pass me the plant pot.", "which pot do you want?"], ["The one with purple flower.", "is it the first one from the right?"], ["Yes, it is.", "[508, 368, 547, 435]"]], [510, 371, 547, 436], [508, 368, 547, 435], "openimages_v1.2/imgs/4bd65f1121e94380_Chair_Houseplant_House_Plant_Flower_Furniture_Flowerpot_5.jpg"]
32
+ [735, [["The dustbin is full.", "which one?"], ["The one in the left.", "what color is it?"], ["It is green.", "is it the first one from the left?"], ["Yes, it is.", "[48, 274, 199, 464]"]], [49, 278, 198, 462], [48, 274, 199, 464], "openimages_v1.2/imgs/7ee6b53585ead951_Waste container_9.jpg"]
33
+ [740, [["I want to take a boat.", "which one do you want?"], ["The one in yellow and blue.", "is it the first one from the left?"], ["Yes, it is.", "[96, 171, 502, 304]"]], [93, 157, 521, 309], [96, 171, 502, 304], "openimages_v1.2/imgs/c22e23202c31e85e_Boat_Barge_Watercraft_Vehicle_4.jpg"]
34
+ [747, [["The balloon is flying so high.", "which one are you talking about?"], ["The first one from the left.", "is it the first one from the left?"], ["Yes, it is.", "[26, 32, 186, 215]"]], [25, 33, 187, 214], [26, 32, 186, 215], "openimages_v1.2/imgs/23a0eb2232fe9e66_Balloon_Vehicle_4.jpg"]
35
+ [758, [["can you pass me the tree in middle?", "which one are you talking about?"], ["it is in the middle.", "is it the tallest one?"], ["yes.", "[210, 206, 325, 357]"]], [191, 210, 338, 363], [210, 206, 325, 357], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
36
+ [758, [["can you pass me the tree in middle?", "which one are you talking about?"], ["it is in the middle.", "is it the tallest one?"], ["yes.", "[210, 206, 325, 357]"]], [191, 210, 338, 363], [210, 206, 325, 357], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
37
+ [778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it yellow?"], ["yes.", "is it the first one from the right?"], ["no, it is in the middle.", "[99, 164, 356, 292]"]], [97, 161, 357, 292], [99, 164, 356, 292], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
38
+ [778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it yellow?"], ["yes.", "is it the first one from the right?"], ["no, it is in the middle.", "[99, 164, 356, 292]"]], [97, 161, 357, 292], [99, 164, 356, 292], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
39
+ [859, [["Can you pass me the biggest pumpkin?", "is it the one on the left?"], ["yes.", "[4, 44, 245, 268]"]], [7, 44, 245, 267], [4, 44, 245, 268], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
40
+ [859, [["Can you pass me the biggest pumpkin?", "is it the one on the left?"], ["yes.", "[4, 44, 245, 268]"]], [7, 44, 245, 267], [4, 44, 245, 268], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
41
+ [864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "is it the first one from the right?"], ["no, it is in the middle.", "is it the first one from the left?"], ["no, is it the first second one from the left?", "[19, 196, 259, 367]"], ["no, it is in the middle.", "[200, 197, 371, 324]"]], [199, 195, 367, 331], [200, 197, 371, 324], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
42
+ [864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "is it the first one from the right?"], ["no, it is in the middle.", "is it the first one from the left?"], ["no, is it the first second one from the left?", "[19, 196, 259, 367]"], ["no, it is in the middle.", "[200, 197, 371, 324]"]], [199, 195, 367, 331], [200, 197, 371, 324], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
43
+ [865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
44
+ [865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
45
+ [865, [["the cake is so cute.", "which one are you talking about?"], ["it is in top middle.", "is it brown?"], ["yes.", "is it the first one from the right?"], ["No, it\u2019s in the middle of the top.", "[339, 15, 469, 102]"], ["the left beside it.", "[342, 46, 469, 101]"], ["the left beside it.", "[342, 46, 469, 101]"], ["And to the left.", "[343, 44, 469, 101]"]], [266, 55, 360, 123], [343, 44, 469, 101], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
46
+ [919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "is it the first one from the right?"], ["yes.", "[320, 0, 490, 173]"]], [328, 1, 479, 161], [320, 0, 490, 173], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
47
+ [928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
48
+ [928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
49
+ [928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "is it the first one from the left?"], ["yes.", "[0, 127, 185, 311]"]], [3, 128, 184, 310], [0, 127, 185, 311], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
50
+ [940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
51
+ [940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
52
+ [940, [["pass me the blue flag.", "which one are you talking about?"], ["the blue one.", "is it the first one from the left?"], ["yes.", "[97, 141, 129, 250]"]], [100, 142, 130, 251], [97, 141, 129, 250], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
53
+ [1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it green?"], ["yes.", "[124, 96, 282, 379]"]], [125, 96, 280, 381], [124, 96, 282, 379], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
54
+ [1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it green?"], ["yes.", "[124, 96, 282, 379]"]], [125, 96, 280, 381], [124, 96, 282, 379], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
55
+ [1098, [["pass me the flower pot.", "which one do you want?"], ["The second one on the right.", "is it the second one from the right?"], ["yes.", "[521, 187, 575, 235]"]], [527, 187, 576, 233], [521, 187, 575, 235], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
56
+ [1098, [["pass me the flower pot.", "which one do you want?"], ["The second one on the right.", "is it the second one from the right?"], ["yes.", "[521, 187, 575, 235]"]], [527, 187, 576, 233], [521, 187, 575, 235], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
57
+ [1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the first one from the right?"], ["no, the nearest one.", "[195, 350, 399, 564]"]], [194, 362, 408, 556], [195, 350, 399, 564], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
58
+ [1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the first one from the right?"], ["no, the nearest one.", "[195, 350, 399, 564]"]], [194, 362, 408, 556], [195, 350, 399, 564], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
59
+ [1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
60
+ [1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
61
+ [1130, [["pass me the flower.", "which one do you want?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[455, 83, 543, 180]"], ["you are wrong, i need the right one.", "[453, 83, 543, 180]"]], [546, 246, 608, 305], [453, 83, 543, 180], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
62
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
63
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
64
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
65
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "is it the one on the far left?"], ["no, it is on the flowerpot.", "[265, 296, 324, 329]"]], [264, 295, 323, 328], [265, 296, 324, 329], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
66
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
67
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
68
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
69
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
70
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one in the middle.", "is it the one in the lower right corner?"], ["No, it is the one in the middle right.", "[501, 213, 566, 271]"]], [506, 213, 568, 267], [501, 213, 566, 271], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
71
+ [775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the first one from the left?"], ["no, it is the second.", "[37, 0, 133, 71]"]], [39, 0, 134, 73], [37, 0, 133, 71], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
72
+ [206, [["No, I want the wine bottle.", "which one do you want?"], ["The nearer one.", "is it the one on the right?"], ["No", "[343, 0, 492, 405]"], ["No, not this one.", "[343, 0, 492, 405]"]], [151, 0, 328, 476], [343, 0, 492, 405], "openimages_v1.2/imgs/25b17c8df7b2400b_Beer_Drink_Bottle_Wine_5.jpg"]
73
+ [1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
74
+ [1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
75
+ [1206, [["pass me a flower", "which one do you want"], ["The upper one.", "is it in the upper right corner?"], ["yes.", "[593, 0, 638, 159]"]], [341, 0, 515, 192], [593, 0, 638, 159], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
76
+ [1209, [["I want a cake", "which one do you want"], ["The right one.", "is it the first one from the right?"], ["Yes.", "is it the first one from the right?"], ["Yes.", "[413, 100, 595, 295]"]], [413, 102, 589, 269], [413, 100, 595, 295], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
77
+ [1209, [["I want a cake", "which one do you want"], ["The right one.", "is it the first one from the right?"], ["Yes.", "is it the first one from the right?"], ["Yes.", "[413, 100, 595, 295]"]], [413, 102, 589, 269], [413, 100, 595, 295], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
78
+ [1232, [["give me a bottle of wine", "which bottle do you want"], ["The upest, from left, the forth.", "is it the fourth bottle from the left?"], ["yes.", "[247, 10, 295, 214]"]], [244, 13, 293, 213], [247, 10, 295, 214], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
79
+ [1232, [["give me a bottle of wine", "which bottle do you want"], ["The upest, from left, the forth.", "is it the fourth bottle from the left?"], ["yes.", "[247, 10, 295, 214]"]], [244, 13, 293, 213], [247, 10, 295, 214], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
80
+ [1236, [["I want a pillow", "which one do you want"], ["The frontmost one.", "is it black and white?"], ["Yes.", "is it the one on the right?"], ["Yes.", "[335, 208, 476, 267]"]], [339, 210, 471, 275], [335, 208, 476, 267], "openimages_v1.2/imgs/2992bde1d85a7162_Nightstand_Pillow_6.jpg"]
81
+ [1266, [["I want the strawberry", "which one do you want"], ["The center one.", "is it the one in the middle?"], ["yes", "[256, 147, 335, 289]"]], [185, 64, 287, 222], [256, 147, 335, 289], "openimages_v1.2/imgs/8a61c3b92dffeb3e_Dessert_Dairy Product_Food_Fruit_Baked goods_Strawberry_Cake_10.jpg"]
82
+ [1287, [["The green pants", "is it a man?"], ["I don\u2019t think so.", "is it a woman?"], ["Yes, I guess", "is she wearing green pants?"], ["Yes", "is she wearing a white top?"], ["Yes", "is she wearing black pants?"], ["No, the green pants", "[0, 0, 639, 479]"]], [527, 276, 580, 346], [0, 0, 639, 479], "openimages_v1.2/imgs/0c22601eb7370faa_Trousers_4.jpg"]
83
+ [1301, [["goblet", "is it a glass?"], ["yes", "[139, 132, 228, 378]"]], [139, 171, 231, 272], [139, 132, 228, 378], "openimages_v1.2/imgs/b132c5bc9f77cb7f_Cocktail_Drink_Food_Fruit_Tableware_Strawberry_Wine_Wine glass_3.jpg"]
84
+ [1309, [["The picture", "is it a person?"], ["Yes", "is it on the left?"], ["Yes", "is it on the right?"], ["No", "is it in the middle?"], ["No", "is it on the wall?"], ["yes", "is it on the left?"], ["yes", "is it the first one on the left?"], ["yes", "[131, 236, 152, 261]"]], [0, 100, 63, 289], [131, 236, 152, 261], "openimages_v1.2/imgs/00225def00d94d8b_Picture frame_Furniture_13.jpg"]
85
+ [1330, [["Tomato", "is it a tomato?"], ["of course", "is it green?"], ["No\uff0cthe red", "is it the top one?"], ["yes", "[41, 28, 196, 311]"]], [37, 145, 196, 309], [41, 28, 196, 311], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
86
+ [1330, [["Tomato", "is it a tomato?"], ["of course", "is it green?"], ["No\uff0cthe red", "is it the top one?"], ["yes", "[41, 28, 196, 311]"]], [37, 145, 196, 309], [41, 28, 196, 311], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
87
+ [1354, [["The purple flowerpot", "is it big?"], ["No", "is it on the right?"], ["No\uff0cleft", "is it on the ground?"], ["yes", "is it the big one?"], ["No", "[414, 246, 548, 435]"]], [85, 154, 162, 234], [414, 246, 548, 435], "openimages_v1.2/imgs/08e6bf832016e29e_Houseplant_Plant_Flower_Flowerpot_11.jpg"]
88
+ [1372, [["I want a red apple", "which one do you want"], ["The red in rightmost", "is it next to the green apple?"], ["No", "is it next to the price sign?"], ["No", "[492, 344, 587, 422]"]], [582, 342, 639, 414], [492, 344, 587, 422], "openimages_v1.2/imgs/7d5be279c905b3fa_Apple_Food_Fruit_Orange_Grapefruit_11.jpg"]
89
+ [1385, [["the cow", "is it standing?"], ["the one on the right side of the tree", "is it black and white?"], ["yes", "[131, 257, 250, 325]"]], [126, 257, 245, 330], [131, 257, 250, 325], "openimages_v1.2/imgs/0e9f655b0a190ab1_Cattle_Plant_Tree_Animal_13.jpg"]
90
+ [1329, [["give me the hot air balloon on the leftmost", "is it the green one?"], ["yes", "[0, 221, 142, 426]"]], [0, 219, 131, 426], [0, 221, 142, 426], "openimages_v1.2/imgs/521d913963c3b47c_Balloon_Vehicle_Aircraft_3.jpg"]
91
+ [1429, [["the pink towel ", "all of it?"], ["No\uff0cthe pink towel ", "all of it?"], ["No\uff0cthe bottom towel", "[0, 296, 638, 423]"]], [0, 294, 639, 423], [0, 296, 638, 423], "openimages_v1.2/imgs/613305c0a39b4c72_Towel_3.jpg"]
92
+ [1436, [["give me the rightmost penguin", "is it the first one from the right?"], ["yes", "[425, 133, 510, 432]"]], [440, 142, 511, 420], [425, 133, 510, 432], "openimages_v1.2/imgs/3868829d6119e6ea_Bird_Penguin_Animal_3.jpg"]
93
+ [1437, [["give me the flowerpot", "is it red?"], ["yes", "is it the whole pot?"], ["yes", "[147, 447, 375, 638]"]], [148, 438, 376, 639], [147, 447, 375, 638], "openimages_v1.2/imgs/23bd2f5d5d520cd1_Houseplant_Plant_Flowerpot_3.jpg"]
94
+ [1992, [["give me the left black flower pot.", "is it the first one from the left?"], ["the first black one from the left.", "is it the first black flower pot from the left?"], ["yeah", "[0, 567, 68, 638]"]], [60, 565, 105, 619], [0, 567, 68, 638], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
95
+ [1960, [["pass me the most right flower pot.", "is it the first one from the right?"], ["yes", "[442, 355, 520, 432]"]], [438, 353, 518, 434], [442, 355, 520, 432], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
96
+ [1918, [["the red cake with 2 eyes.", "which one are you talking about?"], ["the right one.", "is it the first one from the right?"], ["yes", "[338, 311, 591, 535]"]], [332, 308, 592, 546], [338, 311, 591, 535], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
97
+ [1888, [["Give me the bottom slice of cucumber", "is it the one in the lower right corner?"], ["yes", "[359, 376, 492, 479]"]], [358, 376, 493, 479], [359, 376, 492, 479], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
98
+ [1827, [["give me the notebook", "which one do you want?"], ["the left one.", "is it the one in your hand?"], ["yes", "[0, 69, 281, 374]"]], [0, 75, 279, 374], [0, 69, 281, 374], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
99
+ [1813, [["Give me the largest West Blue Flower", "is it the broccoli?"], ["yes", "[250, 224, 496, 390]"]], [251, 223, 495, 403], [250, 224, 496, 390], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
100
+ [1780, [["Give me the leftmost tape", "is it pink?"], ["yes", "[31, 54, 120, 205]"]], [31, 53, 117, 207], [31, 54, 120, 205], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
101
+ [1774, [["give me the nearest boxes.", "ok, which one do you want?"], ["the nearest one.", "is it the first one from the right?"], ["yes, it is in the middle", "[249, 229, 502, 367]"]], [249, 231, 497, 370], [249, 229, 502, 367], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
102
+ [1764, [["Pick the flower bud at the bottom for me.", "which one are you talking about?"], ["the flower bud at the bottom", "is it the first one from the right?"], ["yes", "[373, 254, 443, 351]"]], [376, 259, 443, 350], [373, 254, 443, 351], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
103
+ [1751, [["the most left cat.", "is it black?"], ["no", "is it the first one from the left?"], ["yes", "[96, 25, 305, 141]"]], [94, 28, 301, 140], [96, 25, 305, 141], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
104
+ [1751, [["which cat is closer to me?", "which one are you talking about?"], ["right cat.", "is it the first one from the right?"], ["yes.", "[298, 184, 568, 326]"]], [94, 28, 301, 140], [298, 184, 568, 326], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
105
+ [1764, [["what is in the picture?", "is it a pomegranate?"], ["yes, give me the biggest one.", "is it the one in the middle?"], ["yes.", "[262, 153, 366, 253]"]], [376, 259, 443, 350], [262, 153, 366, 253], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
106
+ [1764, [["what is in the picture?", "is it a pomegranate?"], ["yes, give me the biggest one.", "is it the one in the middle?"], ["yes.", "[262, 153, 366, 253]"]], [376, 259, 443, 350], [262, 153, 366, 253], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
107
+ [1774, [["what is in the picture?", "is it a donut?"], ["yes, give me the blue one.", "is it the first one from the left?"], ["yes.", "[23, 158, 156, 274]"]], [249, 231, 497, 370], [23, 158, 156, 274], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
108
+ [1780, [["what are these?", "is it yellow?"], ["what is in the picture?", "is it a flower?"], ["no.", "is it pink?"], ["what is in the picture?", "is it a cup?"], ["no.", "[0, 0, 637, 389]"]], [31, 53, 117, 207], [0, 0, 637, 389], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
109
+ [1813, [["what is in the picture?", "is it food?"], ["i don\u2019t like broccoli, pick it out.", "is it orange?"], ["no, it is green.", "is it blue?"], ["no, it is green.", "[0, 76, 638, 479]"]], [251, 223, 495, 403], [0, 76, 638, 479], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
110
+ [1827, [["give me the notebook.", "which one do you want?"], ["left one.", "is it the one in your hand?"], ["yes.", "[0, 69, 281, 374]"]], [0, 75, 279, 374], [0, 69, 281, 374], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
111
+ [1888, [["give me the bowl.", "which one do you want?"], ["top right.", "is there a carrot in it?"], ["yes.", "[364, 0, 638, 299]"]], [358, 376, 493, 479], [364, 0, 638, 299], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
112
+ [1918, [["what is in the picture?", "is it red?"], ["give me the one-eye one.", "is it in the front row?"], ["no, it is in the middle.", "is it the one on the left?"], ["no, it is in the middle.", "[124, 140, 273, 281]"]], [332, 308, 592, 546], [124, 140, 273, 281], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
113
+ [1960, [["give the tallest flower.", "is it the first one from the left?"], ["no.", "is it the second one?"], ["no.", "is it the third one?"], ["yes.", "[314, 361, 397, 442]"]], [438, 353, 518, 434], [314, 361, 397, 442], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
114
+ [1992, [["what is in the picture?", "is it a plant?"], ["i think so, give it to me.", "is it a flower pot?"], ["yes.", "is it pink?"], ["no, it is green.", "is it on the left?"], ["no, it is right one.", "[259, 380, 292, 414]"]], [60, 565, 105, 619], [259, 380, 292, 414], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
115
+ [61, [["drive the green car away.", "which one are you talking about?"], ["the middle one.", "is it the first one from the left?"], ["no, the middle one.", "[190, 310, 265, 380]"]], [187, 308, 264, 382], [190, 310, 265, 380], "openimages_v1.2/imgs/8b240c08cb38382e_Toy_Bus_Land vehicle_Vehicle_13.jpg"]
116
+ [65, [["close the laptop on the left.", "is it the one on the left?"], ["yeah.", "[0, 73, 348, 426]"]], [0, 72, 346, 426], [0, 73, 348, 426], "openimages_v1.2/imgs/5b39c13a7271eed4_Laptop_Computer keyboard_3.jpg"]
117
+ [88, [["pass me the far left glass.", "is it the first glass from the left?"], ["yes.", "[56, 165, 127, 376]"]], [64, 238, 121, 290], [56, 165, 127, 376], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
118
+ [93, [["feed the second panda on the left.", "is it the second panda from the left?"], ["yes.", "[149, 103, 268, 219]"]], [148, 100, 267, 217], [149, 103, 268, 219], "openimages_v1.2/imgs/329035dece1a9746_Panda_4.jpg"]
119
+ [142, [["eat the broccoli on the right.", "is it the top one?"], ["yes.", "[233, 149, 460, 342]"]], [229, 152, 460, 354], [233, 149, 460, 342], "openimages_v1.2/imgs/e0b81ffd382fa5f5_Food_Vegetable_Broccoli_3.jpg"]
120
+ [161, [["mark the stone flower bed.", "which one do you mean?"], ["the right one.", "is it the first one from the right?"], ["yes.", "[273, 516, 346, 582]"]], [284, 517, 342, 615], [273, 516, 346, 582], "openimages_v1.2/imgs/0cb491551929a2ee_Stairs_Houseplant_Porch_Plant_Tree_Flower_Building_Flowerpot_4.jpg"]
121
+ [163, [["eat the tomato on the top left.", "is it red?"], ["yes.", "is it the one on the left?"], ["yes.", "[44, 5, 209, 146]"]], [46, 2, 208, 167], [44, 5, 209, 146], "openimages_v1.2/imgs/987761ccf847341d_Tomato_21.jpg"]
122
+ [178, [["kill the meerkat on the right.", "which one are you talking about?"], ["the right one sitting one the stone.", "is it the first one from the right?"], ["yeah.", "[252, 69, 445, 419]"]], [206, 68, 450, 529], [252, 69, 445, 419], "openimages_v1.2/imgs/921f182a889503b5_Carnivore_Animal_3.jpg"]
123
+ [189, [["throw the spoon handle on the table.", "which one?"], ["the right one.", "is it in the blue bowl?"], ["no, it is on the table.", "is it the one in the bowl?"], ["no.", "[302, 0, 446, 29]"]], [410, 360, 456, 397], [302, 0, 446, 29], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
124
+ [191, [["pick the flower in the middle.", "is it the pink one?"], ["yeah.", "[195, 144, 381, 331]"]], [192, 146, 381, 331], [195, 144, 381, 331], "openimages_v1.2/imgs/e26ff9e7c08822f9_Plant_Rose_Flower_6.jpg"]
125
+ [198, [["drive the white van on the left away.", "there are two white vans, which one do you mean?"], ["the right one.", "is it the one next to the red car?"], ["no, the smaller one.", "is it the first one from the right?"], ["no, the second on the left.", "[35, 176, 169, 254]"]], [32, 183, 150, 255], [35, 176, 169, 254], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
126
+ [1442, [["The bird is so cute.", "which one are you talking about?"], ["the black one on the left. ", "is it the first one from the left?"], ["yes.", "[37, 115, 58, 135]"]], [558, 0, 587, 32], [37, 115, 58, 135], "openimages_v1.2/imgs/4e1503114e8c097f_Bird_Duck_Animal_4.jpg"]
127
+ [1466, [["the girl is so lovely.", "which one are you talking about?"], ["The one who shows me her face.", "is it the first one from the left?"], ["yes.", "[142, 279, 240, 419]"]], [234, 272, 352, 425], [142, 279, 240, 419], "openimages_v1.2/imgs/d357157808acbaa9_Toy_Monkey_3.jpg"]
128
+ [1494, [["the donut looks delicious.", "which one are you talking about?"], ["the chocolate one.", "is it the top one?"], ["yes.", "[264, 31, 466, 219]"]], [364, 165, 579, 393], [264, 31, 466, 219], "openimages_v1.2/imgs/6b68fc10e440a1a8_Food_Bagel_Snack_Dessert_Baked goods_Doughnut_3.jpg"]
129
+ [1508, [["give me the persimmon.", "which one do you want?"], ["the biggest one.", "is it the middle one?"], ["no.", "is it the one on the right?"], ["yes.", "[422, 152, 611, 335]"]], [169, 138, 354, 327], [422, 152, 611, 335], "openimages_v1.2/imgs/bdc3d2983ffc0a46_Food_Tomato_Vegetable_19.jpg"]
130
+ [1516, [["the cake looks delicious.<br>", "which piece do you want?"], ["The one on which the strawberry is.<br>", "is it the first piece from the left?"], ["no.", "is it the second piece from the left?"], ["no.", "is it the third piece from the left?"], ["no.", "is it the fourth piece from the left?"], ["no.", "is it the fifth piece from the left?"], ["no, it is on the second row from the left.", "is it the fifth piece from the left?"], ["yes.", "[360, 202, 407, 232]"]], [236, 278, 325, 377], [360, 202, 407, 232], "openimages_v1.2/imgs/17da62a9aa6e99f6_Food_Fast food_Teapot_Person_Dessert_Kettle_Tableware_Human head_Baked goods_Cake_Pastry_5.jpg"]
benchmark/human_eval/xvlm_20230515-192335.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [13, [["ride the red bikes.", "which one do you want?"], ["the middle one.", "is it the front wheel?"], ["no, the back wheel.", "[35, 408, 91, 442]"]], [35, 422, 108, 479], [35, 408, 91, 442], "openimages_v1.2/imgs/2b240f8c7b095972_Bicycle_Bicycle wheel_Land vehicle_House_Vehicle_Building_Tire_4.jpg"]
2
+ [51, [["catch the duck on the right.", "which one?"], ["the one on the far right and is swimming to the left.", "is it the one on the far right?"], ["yes.", "[500, 172, 618, 238]"]], [500, 172, 618, 238], [500, 172, 618, 238], "openimages_v1.2/imgs/0391aa378a4094b3_Bird_Duck_12.jpg"]
3
+ [54, [["please mark the adult penguin.", "is it the first one from the left?"], ["the closest one.", "[273, 253, 437, 547]"]], [187, 115, 420, 548], [273, 253, 437, 547], "openimages_v1.2/imgs/5cc33103845e5519_Bird_Penguin_Animal_4.jpg"]
4
+ [61, [["mark the green lego car.", "is it the one in the middle?"], ["yes.", "[187, 308, 264, 382]"]], [187, 308, 264, 382], [187, 308, 264, 382], "openimages_v1.2/imgs/8b240c08cb38382e_Toy_Bus_Land vehicle_Vehicle_13.jpg"]
5
+ [65, [["bring me the laptop on the left.", "is it the one with the black screen?"], ["yeah.", "[0, 72, 346, 426]"]], [0, 72, 346, 426], [0, 72, 346, 426], "openimages_v1.2/imgs/5b39c13a7271eed4_Laptop_Computer keyboard_3.jpg"]
6
+ [88, [["pour the champagne in the far left glass.", "ok, i see."], ["thanks.", "[64, 238, 121, 290]"]], [64, 238, 121, 290], [64, 238, 121, 290], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
7
+ [88, [["pour the champagne in the far left glass.", "ok, i see."], ["thanks.", "[64, 238, 121, 290]"]], [64, 238, 121, 290], [64, 238, 121, 290], "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg"]
8
+ [93, [["feed the panda in the mid rear.", "is it the one with the head down?"], ["no.", "is it the one on the far right?"], ["no, the furthest one.", "[148, 100, 266, 216]"]], [148, 100, 267, 217], [148, 100, 266, 216], "openimages_v1.2/imgs/329035dece1a9746_Panda_4.jpg"]
9
+ [142, [["pick the broccoli on the top for me.", "is it the one on the far right?"], ["yes.", "[228, 151, 460, 353]"]], [229, 152, 460, 354], [228, 151, 460, 353], "openimages_v1.2/imgs/e0b81ffd382fa5f5_Food_Vegetable_Broccoli_3.jpg"]
10
+ [161, [["mark the flower bed on the right.", "which one?"], ["the right one.", "is it the one on the far right?"], ["yes.", "[270, 525, 299, 564]"]], [284, 517, 342, 615], [270, 525, 299, 564], "openimages_v1.2/imgs/0cb491551929a2ee_Stairs_Houseplant_Porch_Plant_Tree_Flower_Building_Flowerpot_4.jpg"]
11
+ [163, [["wash the tomato on the top left.", "is it the one in the upper left corner?"], ["yes.", "[46, 2, 208, 167]"]], [46, 2, 208, 167], [46, 2, 208, 167], "openimages_v1.2/imgs/987761ccf847341d_Tomato_21.jpg"]
12
+ [178, [["feed the animal sitting on the right.", "is it the one with the head down?"], ["yes", "[206, 68, 450, 529]"]], [206, 68, 450, 529], [206, 68, 450, 529], "openimages_v1.2/imgs/921f182a889503b5_Carnivore_Animal_3.jpg"]
13
+ [189, [["wash the fork on the table.", "which bowl?"], ["the fork.", "is it the one on the far right?"], ["yes.", "[397, 90, 622, 233]"]], [410, 360, 456, 397], [397, 90, 622, 233], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
14
+ [189, [["wash the fork on the table.", "which bowl?"], ["the fork.", "is it the one on the far right?"], ["yes.", "[397, 90, 622, 233]"]], [410, 360, 456, 397], [397, 90, 622, 233], "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg"]
15
+ [191, [["bring me the pink flower on the left.", "which one?"], ["the left one.", "the one on the far left?"], ["the pink flower on the left.", "[56, 181, 244, 323]"]], [192, 146, 381, 331], [56, 181, 244, 323], "openimages_v1.2/imgs/e26ff9e7c08822f9_Plant_Rose_Flower_6.jpg"]
16
+ [198, [["drive the van on the far left.", "is it the first one from the left?"], ["yes.", "[32, 182, 150, 255]"]], [32, 183, 150, 255], [32, 182, 150, 255], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
17
+ [198, [["drive the van on the far left.", "is it the first one from the left?"], ["yes.", "[32, 182, 150, 255]"]], [32, 183, 150, 255], [32, 182, 150, 255], "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg"]
18
+ [206, [["open the black bottle of champagne.", "which bottle is it?"], ["the left one.", "[76, 0, 158, 172]"]], [151, 0, 328, 476], [76, 0, 158, 172], "openimages_v1.2/imgs/25b17c8df7b2400b_Beer_Drink_Bottle_Wine_5.jpg"]
19
+ [209, [["This tea set is so pretty.", "yes, which one do you like?"], ["It is in the middle.", "[168, 38, 378, 215]"]], [168, 39, 378, 215], [168, 38, 378, 215], "openimages_v1.2/imgs/76005037c3e150f7_Teapot_Snack_Dessert_Food_Baked goods_Pastry_6.jpg"]
20
+ [228, [["I want to know its name.", "which one?"], ["The bigger one.", "is it the one on the far right?"], ["yes", "[0, 0, 508, 497]"]], [0, 0, 509, 497], [0, 0, 508, 497], "openimages_v1.2/imgs/31d3d97132fbb2f2_Bird_Penguin_Animal_3.jpg"]
21
+ [255, [["It does not know if it is made by machine.", "what do you want to take?"], ["it is a cookie.", "is it the one on the left?"], ["yes", "[0, 241, 202, 330]"]], [0, 241, 202, 330], [0, 241, 202, 330], "openimages_v1.2/imgs/467d1abed2e32295_Snack_Cookie_Food_Baked goods_17.jpg"]
22
+ [285, [["It does not look very strong", "is it the one on the far left?"], ["no, it\u2019s in the middle.", "is it the one with the black head down?"], ["no", "[231, 210, 402, 400]"]], [231, 210, 402, 400], [231, 210, 402, 400], "openimages_v1.2/imgs/1797f4e85e52bc5a_Building_Bird_Chicken_Animal_4.jpg"]
23
+ [318, [["Give me that coffee cup.", "which one?"], ["white one", "is it the one on the far right?"], ["no", "[68, 6, 252, 167]"]], [68, 6, 252, 167], [68, 6, 252, 167], "openimages_v1.2/imgs/67c02c17c9a759c8_Drink_Coffee cup_Food_Saucer_Tableware_Vegetable_Salad_Broccoli_2.jpg"]
24
+ [326, [["Give me that action figures", "which one do you want?"], ["It is in the middle.", "is it the one with the red head?"], ["no", "ok, here you go."], ["where", "[234, 228, 275, 288]"]], [234, 228, 275, 288], [234, 228, 275, 288], "openimages_v1.2/imgs/bcc0fa02fdce3af0_Toy_Land vehicle_Vehicle_7.jpg"]
25
+ [333, [["This flower is interesting", "which one are you talking about?"], ["The purple one", "is it the one on the far right?"], ["yes", "[397, 261, 467, 344]"]], [397, 261, 468, 345], [397, 261, 467, 344], "openimages_v1.2/imgs/921e76975c6f820c_Plant_Rose_Flower_3.jpg"]
26
+ [393, [["The bird is cute", "which one?"], ["Bottom of the page", "is it the one on the far right?"], ["no", "[222, 279, 301, 394]"]], [222, 279, 301, 394], [222, 279, 301, 394], "openimages_v1.2/imgs/ef4a0d60192428c1_Bird_Penguin_Animal_13.jpg"]
27
+ [407, [["It is so cool", "yes, it\u2019s beautiful."], ["which one", "[85, 121, 601, 337]"]], [85, 121, 601, 337], [85, 121, 601, 337], "openimages_v1.2/imgs/91826655a7d34abd_Land vehicle_Train_Vehicle_Wheel_3.jpg"]
28
+ [429, [["It is really strong.", "which one are you talking about?"], ["the left one", "[28, 188, 282, 363]"]], [28, 188, 282, 364], [28, 188, 282, 363], "openimages_v1.2/imgs/02c91a8515336047_Cattle_10.jpg"]
29
+ [440, [["What is the brand of this computer", "which one are you talking about?"], ["left", "is it the one on the far right?"], ["no", "[207, 0, 334, 177]"]], [0, 0, 230, 256], [207, 0, 334, 177], "openimages_v1.2/imgs/9b031924d17fc0d4_Beer_Laptop_Drink_Office supplies_Bottle_Wine_Book_2.jpg"]
30
+ [447, [["It\u2019s a little samll", "yes."], ["which one", "is it the one on the far right?"], ["no", "[318, 156, 550, 330]"]], [319, 156, 550, 331], [318, 156, 550, 330], "openimages_v1.2/imgs/008ede8abd811226_Squirrel_Animal_3.jpg"]
31
+ [451, [["It is in full bloom", "yes, it\u2019s beautiful."], ["the middle one?", "[228, 75, 411, 257]"]], [247, 244, 297, 333], [228, 75, 411, 257], "openimages_v1.2/imgs/f7f3a10c836226b2_Plant_Flower_Common sunflower_7.jpg"]
32
+ [457, [["It is strange", "yes, do you want to buy one?"], ["yes, the green one", "[205, 50, 232, 63]"]], [29, 348, 266, 440], [205, 50, 232, 63], "openimages_v1.2/imgs/15f3aeb130d0d03d_Toy_Land vehicle_Vehicle_Wheel_8.jpg"]
33
+ [466, [["What kind of animal is this?", "which one are you talking about?"], ["the seconed one", "[200, 30, 321, 307]"]], [277, 30, 354, 219], [200, 30, 321, 307], "openimages_v1.2/imgs/7fe6972c3f562011_Horse_Cattle_Animal_6.jpg"]
34
+ [476, [["Please pick one flower for me.", "which one do you want?"], ["The second one from the left.", "ok, here you go."], ["Please make sure it is the second one from the left.", "[205, 183, 224, 201]"]], [154, 175, 190, 206], [205, 183, 224, 201], "openimages_v1.2/imgs/46ceefe0e0ecff39_Chair_Houseplant_Table_Plant_Flower_Furniture_Flowerpot_1.jpg"]
35
+ [501, [["Pass me the first carton.", "is it the one in the middle?"], ["No. It is the first carton from the left.", "[2, 271, 105, 337]"]], [62, 250, 327, 425], [2, 271, 105, 337], "openimages_v1.2/imgs/2a1552207182c802_Box_4.jpg"]
36
+ [541, [["Pass me the plant pot.", "which one?"], ["The one in the middle.", "is it the one with the flower pot on the left?"], ["No. It is the one closest to us.", "[276, 344, 347, 400]"]], [276, 344, 348, 401], [276, 344, 347, 400], "openimages_v1.2/imgs/b539ca908aece624_Fountain_Houseplant_Plant_Tree_Flower_Flowerpot_5.jpg"]
37
+ [546, [["Pass me the axe.", "which one do you want?"], ["I want the first one from the left.", "ok, i see."], ["Please make sure it\u2019s the first one from the left.", "[145, 169, 250, 514]"]], [145, 169, 250, 514], [145, 169, 250, 514], "openimages_v1.2/imgs/5c7e5c0ac6b63b5e_Tool_Axe_4.jpg"]
38
+ [563, [["The flag is beautiful.", "which side are you talking about?"], ["The one in red, white and blue.", "[96, 177, 315, 303]"]], [97, 177, 315, 304], [96, 177, 315, 303], "openimages_v1.2/imgs/5d170945d5549dea_Flag_3.jpg"]
39
+ [569, [["The flag is beautiful.", "which side are you talking about?"], ["The one farthest away from us.", "[378, 233, 493, 343]"]], [378, 233, 493, 343], [378, 233, 493, 343], "openimages_v1.2/imgs/3bd5795f8469318e_Flag_3.jpg"]
40
+ [592, [["The polar bear is so cute.", "which one are you talking about?"], ["The sleeping one.", "is it the one on the far right?"], ["No. It is the one on the left.", "[463, 186, 573, 228]"]], [38, 206, 183, 254], [463, 186, 573, 228], "openimages_v1.2/imgs/392f8102414cc81e_Polar bear_4.jpg"]
41
+ [600, [["Pass me the cherry tomato.", "which one do you want?"], ["The one in the middle.", "ok, here you go."], ["Please make sure it is the one on the cabbage.", "[131, 168, 255, 290]"]], [131, 168, 255, 290], [131, 168, 255, 290], "openimages_v1.2/imgs/0353b5f2eb7bd145_Cucumber_Zucchini_Food_Tomato_Vegetable_Cabbage_Salad_13.jpg"]
42
+ [689, [["The cake looks really nice.", "yes."], ["Please pass me the red cake closest to us.", "[127, 444, 287, 607]"]], [128, 445, 288, 608], [127, 444, 287, 607], "openimages_v1.2/imgs/ffcce9c7edd86436_Dessert_Food_Baked goods_Cake_Pastry_18.jpg"]
43
+ [696, [["Please pass me the vass.", "which one do you want?"], ["The one in the middle of the second layer of the shelf.", "is it the one with the red cover?"], ["No. It is the white one with a red flower on it.", "is it the one on the far right?"], ["No. It is the one in the middle.", "okay, here you go."], ["Please make sure it is the one in the middle.", "[314, 335, 357, 387]"]], [267, 353, 343, 478], [314, 335, 357, 387], "openimages_v1.2/imgs/a0abaa5bf4b660d8_Teapot_Vase_Tableware_Jug_Furniture_8.jpg"]
44
+ [704, [["The latte art is so beautiful.", "yes."], ["I want the one in the right.", "[273, 41, 400, 120]"]], [273, 41, 400, 120], [273, 41, 400, 120], "openimages_v1.2/imgs/0e4bf084d6b8b360_Croissant_Coffee_Saucer_4.jpg"]
45
+ [727, [["Pass me the plant pot.", "which one?"], ["The one with purple flower.", "is it the one on the far left?"], ["No. It is the first one from the right", "[450, 365, 490, 397]"]], [510, 371, 547, 436], [450, 365, 490, 397], "openimages_v1.2/imgs/4bd65f1121e94380_Chair_Houseplant_House_Plant_Flower_Furniture_Flowerpot_5.jpg"]
46
+ [735, [["The dustbin is full.", "yes, there are many."], ["The one in the left is full.", "is it the one with the blue pattern?"], ["Yes, it is.", "[49, 278, 198, 462]"]], [49, 278, 198, 462], [49, 278, 198, 462], "openimages_v1.2/imgs/7ee6b53585ead951_Waste container_9.jpg"]
47
+ [740, [["I want to take a boat.", "which one do you want to rent?"], ["The one in yellow and blue.", "is it the one on the far right?"], ["No. It is the first one from the left.", "[93, 157, 521, 309]"]], [93, 157, 521, 309], [93, 157, 521, 309], "openimages_v1.2/imgs/c22e23202c31e85e_Boat_Barge_Watercraft_Vehicle_4.jpg"]
48
+ [747, [["The balloon is flying so high.", "which one are you talking about?"], ["The first one from the left.", "[25, 33, 187, 214]"]], [25, 33, 187, 214], [25, 33, 187, 214], "openimages_v1.2/imgs/23a0eb2232fe9e66_Balloon_Vehicle_4.jpg"]
49
+ [758, [["can you pass me the tree in middle?", "yes, which one do you want?"], ["it is in the middle.", "ok, here it is."], ["where?", "[191, 210, 337, 363]"]], [191, 210, 338, 363], [191, 210, 337, 363], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
50
+ [758, [["can you pass me the tree in middle?", "yes, which one do you want?"], ["it is in the middle.", "ok, here it is."], ["where?", "[191, 210, 337, 363]"]], [191, 210, 338, 363], [191, 210, 337, 363], "openimages_v1.2/imgs/f2246253917f75b5_Christmas tree_Tree_4.jpg"]
51
+ [778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it the one with the head down?"], ["no, it is in the middle.", "[445, 181, 583, 234]"]], [97, 161, 357, 292], [445, 181, 583, 234], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
52
+ [778, [["the fish is so cute.", "which one are you talking about?"], ["it is in the middle.", "is it the one with the head down?"], ["no, it is in the middle.", "[445, 181, 583, 234]"]], [97, 161, 357, 292], [445, 181, 583, 234], "openimages_v1.2/imgs/5d97d9b7b9995160_Goldfish_Fish_Animal_14.jpg"]
53
+ [859, [["Can you pass me the biggest pumpkin?", "yes, which one do you want?"], ["the one on the left.", "ok, here you go."], ["show me.", "[6, 44, 245, 266]"]], [7, 44, 245, 267], [6, 44, 245, 266], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
54
+ [859, [["Can you pass me the biggest pumpkin?", "yes, which one do you want?"], ["the one on the left.", "ok, here you go."], ["show me.", "[6, 44, 245, 266]"]], [7, 44, 245, 267], [6, 44, 245, 266], "openimages_v1.2/imgs/6708c917862153ec_Squash (Plant)_Pumpkin_Fruit_4.jpg"]
55
+ [864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
56
+ [864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
57
+ [864, [["Can you pass me the taxi?", "which one are you talking about?"], ["it is in the middle.", "ok, here you go."], ["where?", "[199, 195, 367, 331]"]], [199, 195, 367, 331], [199, 195, 367, 331], "openimages_v1.2/imgs/13399741ea286868_Vehicle_Car_Taxi_4.jpg"]
58
+ [865, [["the cake is so cute.", "which one do you want?"], ["it is in top middle.", "is it the one with the cake on the left?"], ["it is in the middle.", "[189, 63, 274, 141]"], ["the right beside it.", "[266, 55, 360, 123]"]], [266, 55, 360, 123], [266, 55, 360, 123], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
59
+ [865, [["the cake is so cute.", "which one do you want?"], ["it is in top middle.", "is it the one with the cake on the left?"], ["it is in the middle.", "[189, 63, 274, 141]"], ["the right beside it.", "[266, 55, 360, 123]"]], [266, 55, 360, 123], [266, 55, 360, 123], "openimages_v1.2/imgs/008b40159ed22bdf_Food_Baked goods_Pastry_3.jpg"]
60
+ [919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "[327, 1, 479, 161]"]], [328, 1, 479, 161], [327, 1, 479, 161], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
61
+ [919, [["the deer is so cute.", "which one are you talking about?"], ["Behind the doll", "[327, 1, 479, 161]"]], [328, 1, 479, 161], [327, 1, 479, 161], "openimages_v1.2/imgs/9263a5fd06a1b5ca_Snowman_Toy_6.jpg"]
62
+ [928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "[3, 128, 184, 310]"]], [3, 128, 184, 310], [3, 128, 184, 310], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
63
+ [928, [["the flower is so lovely.", "which one are you talking about?"], ["the left one.", "[3, 128, 184, 310]"]], [3, 128, 184, 310], [3, 128, 184, 310], "openimages_v1.2/imgs/0fffd5362226f8d3_Flower_Plant_Rose_3.jpg"]
64
+ [940, [["pass me the blue flag.", "which side?"], ["the blue one.", "is it the one on the far right?"], ["no, the first one from the left.", "[100, 141, 130, 251]"], ["no, the first one from the left.", "[100, 141, 130, 251]"]], [100, 142, 130, 251], [100, 141, 130, 251], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
65
+ [940, [["pass me the blue flag.", "which side?"], ["the blue one.", "is it the one on the far right?"], ["no, the first one from the left.", "[100, 141, 130, 251]"], ["no, the first one from the left.", "[100, 141, 130, 251]"]], [100, 142, 130, 251], [100, 141, 130, 251], "openimages_v1.2/imgs/009c68ec28f32e34_Flag_3.jpg"]
66
+ [1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it the one with the head down?"], [" it is green.", "[125, 96, 280, 381]"]], [125, 96, 280, 381], [125, 96, 280, 381], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
67
+ [1034, [["the doll is so cute.", "which one?"], ["the left one.", "is it the one with the head down?"], [" it is green.", "[125, 96, 280, 381]"]], [125, 96, 280, 381], [125, 96, 280, 381], "openimages_v1.2/imgs/d6d4e0e131db09c9_Toy_4.jpg"]
68
+ [1098, [["pass me the flower pot.", "which one?"], ["The second one on the right.", "[527, 187, 575, 233]"]], [527, 187, 576, 233], [527, 187, 575, 233], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
69
+ [1098, [["pass me the flower pot.", "which one?"], ["The second one on the right.", "[527, 187, 575, 233]"]], [527, 187, 576, 233], [527, 187, 575, 233], "openimages_v1.2/imgs/2dbb4a546d107596_Flower_Street light_Houseplant_Plant_Tree_Flowerpot_4.jpg"]
70
+ [1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the one on the far right?"], ["no, the nearest one.", "[193, 362, 407, 555]"]], [194, 362, 408, 556], [193, 362, 407, 555], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
71
+ [1116, [["pass me the Squirrel doll.", "which one do you want?"], ["the cloest one.", "is it the one on the far right?"], ["no, the nearest one.", "[193, 362, 407, 555]"]], [194, 362, 408, 556], [193, 362, 407, 555], "openimages_v1.2/imgs/5cdad082e579d5cd_Toy_5.jpg"]
72
+ [1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
73
+ [1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
74
+ [1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
75
+ [1130, [["pass me the flower.", "which one?"], ["the right one.", "the one on the far right?"], ["yes.", "[546, 246, 608, 305]"]], [546, 246, 608, 305], [546, 246, 608, 305], "openimages_v1.2/imgs/e47152d5fc91ef57_Plant_Rose_Flower_3.jpg"]
76
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
77
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
78
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
79
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
80
+ [1149, [["can you pass me the pumpkin?", "which one do you want?"], ["the small one on the flowerpot.", "is it the one on the far right?"], ["no, it is on the flowerpot.", "[264, 295, 323, 328]"]], [264, 295, 323, 328], [264, 295, 323, 328], "openimages_v1.2/imgs/0190d205a5b14b41_Fruit_Houseplant_Plant_Pumpkin_Squash (Plant)_Vegetable_Flowerpot_2.jpg"]
81
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
82
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
83
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
84
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
85
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
86
+ [1182, [["pass me the orange, please.", "which one do you want?"], ["The rightmost one.", "is it the one on the far right?"], ["yes.", "[505, 213, 568, 267]"]], [506, 213, 568, 267], [505, 213, 568, 267], "openimages_v1.2/imgs/88e245ce0e6fdd75_Food_Fruit_Orange_6.jpg"]
87
+ [775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the one on the left?"], ["no, it is the second one.", "ok, here you go."], ["where?", "[33, 5, 132, 63]"]], [39, 0, 134, 73], [33, 5, 132, 63], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
88
+ [775, [["can you pass me the plate?", "which one do you want?"], ["Upper left corner.", "is it the one on the left?"], ["no, it is the second one.", "ok, here you go."], ["where?", "[33, 5, 132, 63]"]], [39, 0, 134, 73], [33, 5, 132, 63], "openimages_v1.2/imgs/318a8bb8339dde66_Candle_Tableware_Plate_Platter_Serving tray_4.jpg"]
89
+ [1206, [["pass me a flower", "which one do you want?"], ["The upper red one.", "is it the one on the far right?"], ["yes", "[341, 0, 515, 192]"]], [341, 0, 515, 192], [341, 0, 515, 192], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
90
+ [1206, [["pass me a flower", "which one do you want?"], ["The upper red one.", "is it the one on the far right?"], ["yes", "[341, 0, 515, 192]"]], [341, 0, 515, 192], [341, 0, 515, 192], "openimages_v1.2/imgs/a64572ad63e1c1ab_Rose_Flower_3.jpg"]
91
+ [1209, [["Pass me a cake.", "which one do you want?"], ["The one with a \u201cR\u201d", "is it the one on the far right?"], ["Yes", "[404, 95, 597, 302]"]], [413, 102, 589, 269], [404, 95, 597, 302], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
92
+ [1232, [["Pass me a cake.", "which one do you want?"], ["The one with a \u201cR\u201d", "is it the one on the far right?"], ["Yes", "[404, 95, 597, 302]"]], [413, 102, 589, 269], [404, 95, 597, 302], "openimages_v1.2/imgs/5c7b9a2eec5ff0ae_Muffin_Dessert_Food_Baked goods_7.jpg"]
93
+ [1232, [["Give me a bottle.", "which one do you want?"], ["The upest, from left, the forth.", "[162, 12, 204, 213]"]], [244, 13, 293, 213], [162, 12, 204, 213], "openimages_v1.2/imgs/a211016f402b956b_Bottle_Building_Wine_7.jpg"]
94
+ [1236, [["give me a pillow", "which one do you want?"], ["The frontmost one.", "is it the one on the left?"], ["No.", "[339, 210, 470, 275]"]], [339, 210, 471, 275], [339, 210, 470, 275], "openimages_v1.2/imgs/2992bde1d85a7162_Nightstand_Pillow_6.jpg"]
95
+ [1266, [["strawberry", "yes, which one do you want?"], ["The whole one.", "the one on the far right?"], ["No\uff0cthe middle", "[185, 64, 287, 222]"]], [185, 64, 287, 222], [185, 64, 287, 222], "openimages_v1.2/imgs/8a61c3b92dffeb3e_Dessert_Dairy Product_Food_Fruit_Baked goods_Strawberry_Cake_10.jpg"]
96
+ [1287, [["The green pants", "which one are you talking about?"], ["The one who wears green pants", "[527, 275, 580, 346]"]], [527, 276, 580, 346], [527, 275, 580, 346], "openimages_v1.2/imgs/0c22601eb7370faa_Trousers_4.jpg"]
97
+ [1301, [["goblet", "which glass?"], ["I think there is only one glass", "is it the first glass from the left?"], ["yes", "[138, 171, 231, 272]"]], [139, 171, 231, 272], [138, 171, 231, 272], "openimages_v1.2/imgs/b132c5bc9f77cb7f_Cocktail_Drink_Food_Fruit_Tableware_Strawberry_Wine_Wine glass_3.jpg"]
98
+ [1309, [["The painting", "which one?"], ["The leftmost", "is it the one on the far left?"], ["yes", "[0, 100, 63, 289]"]], [0, 100, 63, 289], [0, 100, 63, 289], "openimages_v1.2/imgs/00225def00d94d8b_Picture frame_Furniture_13.jpg"]
99
+ [1330, [["pass me a tomato", "which one do you want?"], ["The one in lefttop", "[37, 145, 196, 309]"]], [37, 145, 196, 309], [37, 145, 196, 309], "openimages_v1.2/imgs/4e5506fb9ab3dc08_Fruit_Plant_Tomato_Vegetable_8.jpg"]
100
+ [1354, [["The purple flowerpot", "yes, it\u2019s the one on the left of the flower pot"], ["yes", "[49, 274, 140, 360]"]], [85, 154, 162, 234], [49, 274, 140, 360], "openimages_v1.2/imgs/08e6bf832016e29e_Houseplant_Plant_Flower_Flowerpot_11.jpg"]
101
+ [1372, [["I want a red apple", "there are many here, which one do you want"], ["The red in rightmost", "[497, 339, 597, 419]"]], [582, 342, 639, 414], [497, 339, 597, 419], "openimages_v1.2/imgs/7d5be279c905b3fa_Apple_Food_Fruit_Orange_Grapefruit_11.jpg"]
102
+ [1385, [["the cow", "which one?"], ["the one on the right side of the tree", "is it the one on the far right?"], ["NO", "[537, 321, 620, 344]"]], [126, 257, 245, 330], [537, 321, 620, 344], "openimages_v1.2/imgs/0e9f655b0a190ab1_Cattle_Plant_Tree_Animal_13.jpg"]
103
+ [1329, [["give me the hot air balloon on the leftmost", "is it the first one from the left?"], ["yes", "[0, 219, 131, 426]"]], [0, 219, 131, 426], [0, 219, 131, 426], "openimages_v1.2/imgs/521d913963c3b47c_Balloon_Vehicle_Aircraft_3.jpg"]
104
+ [1429, [["the pink towel ", "yes."], ["give it to me", "ok, here you go."], ["label it", "[0, 0, 639, 115]"]], [0, 294, 639, 423], [0, 0, 639, 115], "openimages_v1.2/imgs/613305c0a39b4c72_Towel_3.jpg"]
105
+ [1436, [["give me the rightmost penguin", "is it the first one from the left?"], ["No", "[440, 141, 511, 420]"]], [440, 142, 511, 420], [440, 141, 511, 420], "openimages_v1.2/imgs/3868829d6119e6ea_Bird_Penguin_Animal_3.jpg"]
106
+ [1437, [["give me the flowerpot", "is it the one on the left?"], ["No", "is it the one on the far right?"], ["No", "[0, 372, 73, 639]"]], [148, 438, 376, 639], [0, 372, 73, 639], "openimages_v1.2/imgs/23bd2f5d5d520cd1_Houseplant_Plant_Flowerpot_3.jpg"]
107
+ [1992, [["give me the left black flower pot.", "is it the one on the left?"], ["yes", "[10, 519, 113, 612]"]], [60, 565, 105, 619], [10, 519, 113, 612], "openimages_v1.2/imgs/02a344aac1eead08_House_Tree_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg"]
108
+ [1960, [["pass me the most right flower pot.", "is it the one on the far right?"], ["yes", "[438, 353, 518, 434]"]], [438, 353, 518, 434], [438, 353, 518, 434], "openimages_v1.2/imgs/6baf9ff6fd56c7c6_Houseplant_Plant_Flowerpot_4.jpg"]
109
+ [1918, [["the red cake with 2 eyes.", "which one?"], ["right one", "[332, 307, 592, 545]"]], [332, 308, 592, 546], [332, 307, 592, 545], "openimages_v1.2/imgs/fcc713fe37b82bdc_Toy_Snack_Dessert_Food_Baked goods_12.jpg"]
110
+ [1888, [["Give me the bottom slice of cucumber", "is it the first slice from the left in the first row"], ["the first slice from right", "[430, 300, 537, 376]"]], [358, 376, 493, 479], [430, 300, 537, 376], "openimages_v1.2/imgs/81303291b41e898d_Cucumber_3.jpg"]
111
+ [1827, [["give me the notebook", "is it the one on the far right?"], ["no. it is on the left.", "[0, 75, 279, 373]"]], [0, 75, 279, 374], [0, 75, 279, 373], "openimages_v1.2/imgs/7d930e70cd3b11a7_Laptop_Computer keyboard_Book_1.jpg"]
112
+ [1813, [["Give me the largest West Blue Flower", "is it the one on the far right?"], ["no, it is in the middle.", "[251, 223, 495, 403]"]], [251, 223, 495, 403], [251, 223, 495, 403], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
113
+ [1813, [["Give me the largest West Blue Flower", "is it the one on the far right?"], ["no, it is in the middle.", "[251, 223, 495, 403]"]], [251, 223, 495, 403], [251, 223, 495, 403], "openimages_v1.2/imgs/2f515c715d155747_Toy_Food_Broccoli_2.jpg"]
114
+ [1780, [["Give me the leftmost tape", "is it the first one from the left?"], ["yes", "[31, 53, 117, 207]"]], [31, 53, 117, 207], [31, 53, 117, 207], "openimages_v1.2/imgs/638c7052acd8ca03_Adhesive tape_4.jpg"]
115
+ [1774, [["give me the nearest boxes.", "is it the one on the left?"], ["no, it is in the middle.", "is it the one with the red box on the left?"], ["yes", "[146, 158, 350, 294]"]], [249, 231, 497, 370], [146, 158, 350, 294], "openimages_v1.2/imgs/0ae1f7df4aeca2f9_Box_Table_8.jpg"]
116
+ [1764, [["Pick the flower bud at the bottom for me.", "which one?"], ["the flower bud at the bottom", "is it the one on the far right?"], ["yes", "[376, 259, 443, 350]"]], [376, 259, 443, 350], [376, 259, 443, 350], "openimages_v1.2/imgs/0c875a717a0c0515_Pomegranate_3.jpg"]
117
+ [1751, [["the most left cat.", "ok, i see."], ["go", "[93, 27, 301, 140]"]], [94, 28, 301, 140], [93, 27, 301, 140], "openimages_v1.2/imgs/2b7a258daaeb3636_Carnivore_Cat_Animal_4.jpg"]
118
+ [1442, [["the bird is beautiful.", "which one are you talking about?"], ["in the middle.", "is it the one with the head facing us?"], ["yes.", "[179, 165, 206, 197]"]], [558, 0, 587, 32], [179, 165, 206, 197], "openimages_v1.2/imgs/4e1503114e8c097f_Bird_Duck_Animal_4.jpg"]
119
+ [1494, [["give me the donut.", "which one do you want?"], ["the yellow.", "is it the one on the left?"], ["yes.", "[199, 205, 410, 433]"]], [364, 165, 579, 393], [199, 205, 410, 433], "openimages_v1.2/imgs/6b68fc10e440a1a8_Food_Bagel_Snack_Dessert_Baked goods_Doughnut_3.jpg"]
120
+ [1508, [["wihch tomato is biggest?", "yes."], ["which tomato is biggest?", "[169, 138, 354, 327]"]], [169, 138, 354, 327], [169, 138, 354, 327], "openimages_v1.2/imgs/bdc3d2983ffc0a46_Food_Tomato_Vegetable_19.jpg"]
121
+ [1516, [["give me the red cake.", "is it the one on the left?"], ["no, on the right.", "is it the one on the right?"], ["yes.", "[334, 144, 364, 166]"]], [236, 278, 325, 377], [334, 144, 364, 166], "openimages_v1.2/imgs/17da62a9aa6e99f6_Food_Fast food_Teapot_Person_Dessert_Kettle_Tableware_Human head_Baked goods_Cake_Pastry_5.jpg"]
122
+ [1518, [["the cow is so big.", "which one are you talking about?"], ["in the middle.", "is it the one with the head facing us?"], ["yes.", "[154, 93, 582, 359]"]], [595, 122, 638, 232], [154, 93, 582, 359], "openimages_v1.2/imgs/8a5009673e492c9d_Cattle_Plant_Animal_8.jpg"]
123
+ [1554, [["give me the flag.", "which side do you want?"], ["right.", "is it the first side from the right?"], ["yes.", "[485, 8, 507, 166]"]], [156, 48, 218, 340], [485, 8, 507, 166], "openimages_v1.2/imgs/029bb5332af019f2_Flag_5.jpg"]
124
+ [1563, [["choose the biggest flag.", "is it the first flag from the left?"], ["no.", "is it the first flag from the left?"], ["no, in the middle.", "[290, 164, 343, 212]"]], [197, 252, 228, 284], [290, 164, 343, 212], "openimages_v1.2/imgs/c3441f8e9cda4470_Flag_43.jpg"]
125
+ [1583, [["it is delicious.", "which one do you want?"], ["right two.", "[259, 211, 381, 270]"]], [259, 211, 382, 270], [259, 211, 381, 270], "openimages_v1.2/imgs/9c3ca6d64e55c775_Dessert_Food_Fruit_Baked goods_Strawberry_Cake_5.jpg"]
126
+ [1650, [["give me the bowl.", "which one do you want?"], ["the blue.", "is it the one on the far right?"], ["yes.", "[580, 33, 639, 111]"]], [432, 38, 624, 186], [580, 33, 639, 111], "openimages_v1.2/imgs/6fd23a979a53f388_Chopsticks_Drink_Tableware_Bowl_8.jpg"]
127
+ [1652, [["give it to me.", "what do you want?"], ["the green one.", "the one on the far right?"], ["no, on the left.", "[0, 104, 311, 323]"]], [436, 147, 639, 429], [0, 104, 311, 323], "openimages_v1.2/imgs/8028f73fd4273898_Bus_Toy_Land vehicle_Vehicle_3.jpg"]
128
+ [1657, [["give me the dish.", "which one?"], ["right.", "is there a spoon on it?"], ["yes.", "[427, 182, 505, 261]"]], [272, 253, 428, 305], [427, 182, 505, 261], "openimages_v1.2/imgs/17b9486db8f43d5f_Vase_Houseplant_Mixing bowl_Table_Tableware_Bowl_Plate_Plant_Jug_Furniture_Flowerpot_5.jpg"]
129
+ [1708, [["what is in the picture?", "i see."], ["what is in the picture?", "[250, 194, 445, 359]"]], [66, 180, 179, 308], [250, 194, 445, 359], "openimages_v1.2/imgs/3d259b911bfc5069_Houseplant_Plant_Flowerpot_4.jpg"]
130
+ [1735, [["the dog is cute.", "which one are you talking about?"], ["on the right.", "is it the one with the head down?"], ["yes.", "[266, 6, 565, 484]"]], [40, 166, 274, 310], [266, 6, 565, 484], "openimages_v1.2/imgs/71da951cd1ea96c5_Carnivore_Dog_Animal_3.jpg"]
benchmark/invig_grounding/huge-main-0.7630/20230512-181257.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/huge-main-0.7740/20230512-182551.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230514-043824.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230516-090623.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230516-091634.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230516-113956.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230516-141445.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-large/20230516-165140.json ADDED
The diff for this file is too large to render. See raw diff
 
benchmark/invig_grounding/invig-m-dialog/20230513-033645.json ADDED
The diff for this file is too large to render. See raw diff