xj
[feat] add benchmark results and huge ckpt/logs
6f5c44e
["index", "text_input", "text_output", "text_gen", "image_path", "bbox_target", "all_target"]
[0, [[" can you pass me that bottle of wine?", "which bottle do you want?"], [" i want the bottle on the left.", "ok, here you go."], [" i want the bottle on the far left.", ""]], "<bin_1> <bin_320> <bin_304> <bin_880>", "", "openimages_v1.2/imgs/00bf29ce5868629f_Beer_Drink_Bottle_2.jpg", [0.0, 0.3203125, 0.30416667461395264, 0.8812500238418579], [[0.0, 0.3203125, 0.30416666666666664, 0.88125]]]
[1, [[" can you give me that flower pot?", "which one do you want?"], [" i want the blue one.", "is it the one on the far right?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_333> <bin_925> <bin_443> <bin_994>", "", "openimages_v1.2/imgs/2a1421c12c2a6754_Houseplant_Plant_Flower_Flowerpot_7.jpg", [0.42034903168678284, 0.30687499046325684, 0.4912639558315277, 0.34437501430511475], [[0.33299100000000004, 0.9262499999999999, 0.44296, 0.9949999999999999]]]
[2, [[" i want to rent a boat.", "which one do you want?"], [" i want the one with people on it.", "is it the first one from the left?"], [" no, it's the second one from the right.", "ok, i see."], [" yes, this is the boat.", ""]], "<bin_469> <bin_357> <bin_679> <bin_452>", "", "openimages_v1.2/imgs/1adabf79660b6887_Tree_Vehicle_Barge_5.jpg", [0.6933509707450867, 0.38427498936653137, 0.8887379765510559, 0.5182719826698303], [[0.469471, 0.357697, 0.679783, 0.452935]]]
[3, [[" i want a balloon.", "which one do you want?"], [" i want the orange one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, this is the one.", ""]], "<bin_146> <bin_1> <bin_307> <bin_229>", "", "openimages_v1.2/imgs/5a9fca8a0634c496_Balloon_5.jpg", [0.14624999463558197, 0.0, 0.30687499046325684, 0.22916701436042786], [[0.14625, 0.0, 0.306875, 0.229167]]]
[4, [[" i want to buy a camera.", "which one do you want?"], [" i want the biggest one.", "is it the first one from the left?"], [" yes, that's it.", "ok, i see."], [" yes, that's it.", ""]], "<bin_1> <bin_346> <bin_611> <bin_961>", "", "openimages_v1.2/imgs/13beffd1256dcb7b_Camera_3.jpg", [0.0, 0.3465704023838043, 0.6119133830070496, 0.9620939493179321], [[0.0, 0.3465704, 0.6119134, 0.9620939]]]
[5, [[" can you give me a pot of flowers?", "which pot do you want?"], [" i want the pot on the right.", "ok, here you go."], [" i want the pot on the right.", ""]], "<bin_582> <bin_1> <bin_757> <bin_186>", "", "openimages_v1.2/imgs/bb6c74bea8150aa2_Cat_Carnivore_Mammal_Plant_Flowerpot_Animal_2.jpg", [0.5824999809265137, 0.0008329999400302768, 0.7581250071525574, 0.18666699528694153], [[0.5825, 0.000833, 0.758125, 0.186667]]]
[6, [[" that penguin is so cute!", "which one are you talking about?"], [" the one on the far left.", ""]], "<bin_1> <bin_399> <bin_169> <bin_914>", "", "openimages_v1.2/imgs/db34863d9089b482_Bird_Penguin_Animal_3.jpg", [0.0, 0.3996250033378601, 0.1693750023841858, 0.9146339297294617], [[0.0, 0.39962499999999995, 0.169375, 0.9146339999999998]]]
[7, [[" this duck is so cute!", "which one?"], [" this is the one in front of us.", "is it the closest one to us?"], [" yes.", ""]], "<bin_270> <bin_372> <bin_691> <bin_963>", "", "openimages_v1.2/imgs/77040fc87277de7b_Bird_Duck_Goose_Animal_4.jpg", [0.27000001072883606, 0.3724200129508972, 0.6918749809265137, 0.9643529653549194], [[0.27, 0.37242, 0.691875, 0.964353]]]
[8, [[" i want to buy a coin.", "which one do you want?"], [" i want the one in the middle.", "ok, here you go."], [" i want the one on the left.", ""]], "<bin_154> <bin_258> <bin_370> <bin_576>", "", "openimages_v1.2/imgs/535fc43786ac83da_Coin_4.jpg", [0.15437500178813934, 0.2583329975605011, 0.37062498927116394, 0.5766670107841492], [[0.154375, 0.258333, 0.370625, 0.576667]]]
[9, [[" i want to buy a toy car.", "which one do you want?"], [" i want the red one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" i want the second one from the left.", ""]], "<bin_315> <bin_501> <bin_444> <bin_622>", "", "openimages_v1.2/imgs/9620e5e8bbb35839_Toy_Boat_Vehicle_20.jpg", [0.3149999976158142, 0.5016670227050781, 0.4443749785423279, 0.6225000023841858], [[0.315, 0.501667, 0.4443750000000001, 0.6225]]]
[10, [[" i want to eat a piece of pizza.", "which piece do you want?"], [" i want the one on the left.", "is it the first piece from the left?"], [" yes, that's it.", ""]], "<bin_57> <bin_439> <bin_402> <bin_871>", "", "openimages_v1.2/imgs/4105533fe46dfba9_Fast food_Food_Baked goods_Pizza_Tablet computer_5.jpg", [0.057499997317790985, 0.4241670072078705, 0.4156250059604645, 0.8649999499320984], [[0.05750000000000001, 0.439167, 0.4025, 0.871667]]]
[11, [[" i want to pick an apple.", "which one do you want?"], [" i want the one on the far right.", "ok, here you go."], [" i want the one on the far right.", ""]], "<bin_888> <bin_475> <bin_998> <bin_546>", "", "openimages_v1.2/imgs/8047ac20dfde22e4_Apple_Food_Fruit_Tree_7.jpg", [0.8887590169906616, 0.47578102350234985, 0.9988290071487427, 0.5460940003395081], [[0.888759, 0.475781, 0.998829, 0.546094]]]
[12, [[" that duck is so cute!", "which one are you talking about?"], [" the one with the green head.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_519> <bin_656> <bin_619> <bin_721>", "", "openimages_v1.2/imgs/02883b8fef974c57_Bird_Tree_Duck_4.jpg", [0.5195618271827698, 0.6565420627593994, 0.6197183132171631, 0.7219626307487488], [[0.5195618153364632, 0.6565420560747663, 0.6197183098591549, 0.7219626168224299]]]
[13, [[" can you rent a bike for me?", "yes, which one do you want?"], [" the one in the middle.", "ok, here you go."], [" the one closest to the white line.", ""]], "<bin_55> <bin_878> <bin_169> <bin_998>", "", "openimages_v1.2/imgs/2b240f8c7b095972_Bicycle_Bicycle wheel_Land vehicle_House_Vehicle_Building_Tire_4.jpg", [0.0, 0.7875000238418579, 0.18437500298023224, 0.9841669797897339], [[0.05500000000000001, 0.879167, 0.16875, 0.999167]]]
[14, [[" that dog is so cute!", "which one are you talking about?"], [" the yellow one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_440> <bin_383> <bin_681> <bin_998>", "", "openimages_v1.2/imgs/6d43d18c15e54b09_Dog_Animal_3.jpg", [0.44062501192092896, 0.3832390010356903, 0.6812499761581421, 0.999057948589325], [[0.440625, 0.383239, 0.68125, 0.999058]]]
[15, [[" i want to buy a loaf of bread.", "which one do you want?"], [" i want the one on the right.", "ok, here you go."], [" i want the first one on the right.", ""]], "<bin_717> <bin_24> <bin_979> <bin_941>", "", "openimages_v1.2/imgs/404617085814836f_Food_Baked goods_Bread_Pastry_4.jpg", [0.7177730202674866, 0.02419400028884411, 0.9804689288139343, 0.9419349431991577], [[0.717773, 0.024193999999999997, 0.980469, 0.941935]]]
[16, [[" can you pass me that computer?", "which one are you talking about?"], [" the one on the right.", "ok, here you go."], [" the one with the screen on.", ""]], "<bin_243> <bin_380> <bin_625> <bin_858>", "", "openimages_v1.2/imgs/1ad6a79cc2d79df2_Laptop_Musical instrument_Musical keyboard_Piano_2.jpg", [0.24356000125408173, 0.3802820146083832, 0.6252930164337158, 0.859154999256134], [[0.24356, 0.380282, 0.625293, 0.859155]]]
[17, [[" can you get me a trash can?", "which one do you want?"], [" the black one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_602> <bin_225> <bin_935> <bin_995>", "", "openimages_v1.2/imgs/b6ee634e047252bb_Waste container_3.jpg", [0.6025000214576721, 0.22507400810718536, 0.9356249570846558, 0.996051013469696], [[0.6025, 0.22507399999999997, 0.935625, 0.996051]]]
[18, [[" i want to buy a dumbbell.", "which one do you want?"], [" i want the one on the shelf.", "is it the one on the left?"], [" no, it's the one on the right.", "okay, here you go."], [" there are three dumbbells on the shelf, which one do you want?", "okay, i see."], [" i want the one on the far right.", ""]], "<bin_748> <bin_490> <bin_854> <bin_637>", "", "openimages_v1.2/imgs/69828f6bc087f44d_Dumbbell_9.jpg", [0.7490230202674866, 0.4903339743614197, 0.8544920086860657, 0.6379609704017639], [[0.749023, 0.49033399999999994, 0.8544920000000001, 0.637961]]]
[19, [[" i want a drink.", "which one do you want?"], [" i want the yellow one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "okay, i see."], [" yes, that's the one.", ""]], "<bin_464> <bin_144> <bin_627> <bin_412>", "", "openimages_v1.2/imgs/4ed57693f85ad577_Beer_Drink_Food_3.jpg", [0.46406251192092896, 0.14374999701976776, 0.628125011920929, 0.4124999940395355], [[0.4640625, 0.14375, 0.628125, 0.4125]]]
[20, [[" that cow is so cute!", "which one are you talking about?"], [" the one on the far right.", ""]], "<bin_764> <bin_434> <bin_846> <bin_505>", "", "openimages_v1.2/imgs/d774cb62ba5cf9f8_Cattle_Mammal_Bull_Animal_4.jpg", [0.7646480202674866, 0.4340279996395111, 0.8466799855232239, 0.5052080154418945], [[0.764648, 0.43402799999999997, 0.8466799999999999, 0.505208]]]
[21, [[" can you give me a pillow?", "which one do you want?"], [" i want the purple one.", "is it the one on the left?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_242> <bin_292> <bin_347> <bin_562>", "", "openimages_v1.2/imgs/0c0398fd0fe7a378_Pillow_Bed_5.jpg", [0.24187497794628143, 0.2922930121421814, 0.34687501192092896, 0.562969982624054], [[0.241875, 0.292293, 0.346875, 0.56297]]]
[22, [[" can you pass me that candle?", "which one do you want?"], [" i want the dark one.", "is it the first one from the left?"], [" no, it's the second one from the right.", "ok, i see."], [" yes, this is it.", ""]], "<bin_553> <bin_216> <bin_586> <bin_808>", "", "openimages_v1.2/imgs/cbc19fe5f20d72ea_Candle_9.jpg", [0.7167448997497559, 0.25, 0.7558685541152954, 0.8083332777023315], [[0.5539906103286385, 0.21666666666666667, 0.5868544600938967, 0.8083333333333333]]]
[23, [[" can you give me a loaf of bread?", "which one do you want?"], [" i want the biggest one.", "is it the one on the far right?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_12> <bin_323> <bin_835> <bin_969>", "", "openimages_v1.2/imgs/a7800b7ae79beac6_Fast food_Croissant_Juice_Cocktail_Drink_Food_Baked goods_Bread_3.jpg", [0.04062499850988388, 0.35624998807907104, 0.800000011920929, 1.0], [[0.0125, 0.323438, 0.835938, 0.9703120000000001]]]
[24, [[" i want to buy a toy.", "which one do you want?"], [" i want the one with the bird.", "is it the first one from the left?"], [" yes, that's it.", "ok, i see."], [" yes, that's it.", ""]], "<bin_55> <bin_286> <bin_252> <bin_826>", "", "openimages_v1.2/imgs/d2f65874fc4e012e_Fast food_Toy_Person_Dessert_Food_Baked goods_6.jpg", [0.04312499985098839, 0.3252109885215759, 0.2224999964237213, 0.7900660037994385], [[0.05500000000000001, 0.286785, 0.251875, 0.826617]]]
[25, [[" i want to buy a painting.", "which one do you want?"], [" i want the white one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, this is it.", ""]], "<bin_216> <bin_159> <bin_744> <bin_765>", "", "openimages_v1.2/imgs/3725e1771f741792_Picture frame_3.jpg", [0.8324999809265137, 0.745826005935669, 0.9987499117851257, 0.9981449842453003], [[0.21625, 0.15955499999999997, 0.745, 0.766234]]]
[26, [[" can you pass me that lego?", "which one do you want?"], [" i want the black one.", "is it the one on the right?"], [" no, it's the one on the left.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_171> <bin_85> <bin_559> <bin_561>", "", "openimages_v1.2/imgs/28e0c78074c07d07_Toy_3.jpg", [0.17125000059604645, 0.08474600315093994, 0.559374988079071, 0.5619980096817017], [[0.17125, 0.08474600000000002, 0.559375, 0.561998]]]
[27, [[" can you pass me that bottle of soap?", "which bottle do you want?"], [" i want the brown bottle.", "is it the first bottle from the right?"], [" yes, this is it.", "ok, i see."], [" yes, this is the bottle.", ""]], "<bin_546> <bin_264> <bin_629> <bin_520>", "", "openimages_v1.2/imgs/aaa7694cefcb1f16_Sink_Bottle_3.jpg", [0.546875, 0.2644349932670593, 0.6293749809265137, 0.520501971244812], [[0.546875, 0.264435, 0.629375, 0.520502]]]
[28, [[" i want to buy a sword.", "which one do you want?"], [" i want the middle one.", "ok, here you go."], [" i want the one on the far right.", ""]], "<bin_682> <bin_48> <bin_755> <bin_949>", "", "openimages_v1.2/imgs/cea724503fb2517f_Sword_Weapon_10.jpg", [0.7875000238418579, 0.10833333432674408, 0.875, 0.9375], [[0.6828125, 0.04791666666666667, 0.75625, 0.95]]]
[29, [[" this lion is so cute!", "which one are you talking about?"], [" the one standing.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_358> <bin_152> <bin_845> <bin_640>", "", "openimages_v1.2/imgs/e064b45d1f89963f_Carnivore_Plant_Lion_Animal_4.jpg", [0.5699999928474426, 0.2702449858188629, 0.7875000238418579, 0.48022598028182983], [[0.358125, 0.152542, 0.8462499999999998, 0.640301]]]
[30, [[" i want to eat a pomegranate.", "which one do you want?"], [" i want the one in the middle.", "is it the one with the red leaves on the right?"], [" yes, this is it.", ""]], "<bin_238> <bin_299> <bin_629> <bin_858>", "", "openimages_v1.2/imgs/0a8543063cd9b347_Fruit_Orange_Grapefruit_Pomegranate_10.jpg", [0.5356249809265137, 0.19397400319576263, 0.7250000238418579, 0.54237300157547], [[0.238125, 0.299435, 0.63, 0.858757]]]
[31, [[" can you pass me that computer?", "which one are you talking about?"], [" the white one.", "is it the one on the right?"], [" no, it's the one in the middle.", ""]], "<bin_386> <bin_280> <bin_565> <bin_459>", "", "openimages_v1.2/imgs/d9c87315fdaef6e2_Laptop_Computer keyboard_Desk_Office building_Computer monitor_Furniture_2.jpg", [0.6031249761581421, 0.6266670227050781, 0.8649999499320984, 0.786666989326477], [[0.386875, 0.28, 0.565625, 0.459167]]]
[32, [[" i want a bottle of wine.", "which bottle do you want?"], [" i want the transparent bottle.", "is it the first bottle from the left?"], [" no, it's the first bottle from the right.", "okay, i see."], [" i want the first bottle from the left.", ""]], "<bin_504> <bin_368> <bin_640> <bin_683>", "", "openimages_v1.2/imgs/5f038029ce66c6d6_Drink_Table_Tableware_Bottle_Wine_Furniture_10.jpg", [0.07124999910593033, 0.059098999947309494, 0.3174999952316284, 0.5206379890441895], [[0.504375, 0.368668, 0.640625, 0.6838650000000001]]]
[33, [[" can you pass me that book?", "which one do you want?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", "ok, here you go."], [" yes, that's it.", ""]], "<bin_541> <bin_75> <bin_756> <bin_905>", "", "openimages_v1.2/imgs/0658db1f0796d47e_Book_8.jpg", [0.5957030057907104, 0.09104300290346146, 0.8203120231628418, 0.906020998954773], [[0.541992, 0.07489, 0.756836, 0.906021]]]
[34, [[" that cat is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_402> <bin_376> <bin_699> <bin_599>", "", "openimages_v1.2/imgs/66fff12bb225cc23_Carnivore_Cat_Animal_3.jpg", [0.4023439884185791, 0.3761970102787018, 0.6992189884185791, 0.5991790294647217], [[0.402344, 0.376197, 0.699219, 0.599179]]]
[35, [[" that kitten is so cute!", "which one are you talking about?"], [" the gray one.", "is it the one on the far right?"], [" no, it's in the lower right corner.", ""]], "<bin_688> <bin_565> <bin_877> <bin_707>", "", "openimages_v1.2/imgs/f6c84c3bd9adbcb4_Cat_Carnivore_Mammal_Animal_Human hand_3.jpg", [0.6890624761581421, 0.565625011920929, 0.878125011920929, 0.707812488079071], [[0.6890625, 0.565625, 0.878125, 0.7078125]]]
[36, [[" can you give me a radish?", "which one do you want?"], [" i want the biggest one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_10> <bin_590> <bin_280> <bin_985>", "", "openimages_v1.2/imgs/4ebee17068e1b1fa_Radish_Food_Vegetable_5.jpg", [0.009999999776482582, 0.5902090072631836, 0.28062498569488525, 0.9864009618759155], [[0.01, 0.590209, 0.280625, 0.986401]]]
[37, [[" can you give me a bottle of beer?", "which bottle do you want?"], [" i want the bottle on the right.", "ok, here you go."], [" i want the bottle on the right of the glass.", ""]], "<bin_674> <bin_395> <bin_862> <bin_997>", "", "openimages_v1.2/imgs/426999d45b22fa43_Beer_Drink_Bottle_Flowerpot_1.jpg", [0.6797389984130859, 0.4117650091648102, 0.8741829991340637, 0.9918299913406372], [[0.674837, 0.395425, 0.8627450000000001, 0.9983660000000001]]]
[38, [[" can you pass me that trash can?", "which one are you talking about?"], [" the one with the orange label.", "ok, here you go."], [" the one in the middle.", ""]], "<bin_76> <bin_643> <bin_224> <bin_998>", "", "openimages_v1.2/imgs/0e0f54a43f8cf3a6_Waste container_3.jpg", [0.22624997794628143, 0.5744439959526062, 0.546875, 0.9988890886306763], [[0.075625, 0.643333, 0.22437499999999996, 0.9988890000000001]]]
[39, [[" i want to buy a piano.", "which one do you want?"], [" i want the one by the window.", "is it the first one from the right?"], [" no, it's the second one from the right.", "okay, i see."], [" yes, this is the piano.", ""]], "<bin_594> <bin_297> <bin_733> <bin_577>", "", "openimages_v1.2/imgs/93c1d11d4ba556eb_Desk_Harpsichord_Musical instrument_Musical keyboard_Piano_Furniture_Building_5.jpg", [0.714062511920929, 0.31388887763023376, 0.9375, 0.6305555701255798], [[0.595, 0.297778, 0.73375, 0.577778]]]
[40, [[" that balloon is so beautiful!", "which one are you talking about?"], [" the red one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_126> <bin_349> <bin_223> <bin_477>", "", "openimages_v1.2/imgs/e1c00b657d06ec32_Balloon_5.jpg", [0.1262499988079071, 0.3494580090045929, 0.22312501072883606, 0.4770640432834625], [[0.12625, 0.349458, 0.22312499999999996, 0.477064]]]
[41, [[" that flag is so beautiful!", "which side are you talking about?"], [" the side on the far left.", ""]], "<bin_315> <bin_163> <bin_392> <bin_433>", "", "openimages_v1.2/imgs/29e1491b2d21a666_Flag_Building_4.jpg", [0.3154299855232239, 0.16276000440120697, 0.39257800579071045, 0.4335939884185791], [[0.31543, 0.16276, 0.392578, 0.433594]]]
[42, [[" can you pass me that bottle of water?", "which bottle do you want?"], [" the bottle in the lower right corner.", "ok, here you go."], [" the bottle with the blue label.", ""]], "<bin_642> <bin_502> <bin_759> <bin_943>", "", "openimages_v1.2/imgs/9a54bb17b3b93159_Drink_Bottle_11.jpg", [0.8043749928474426, 0.5208330154418945, 0.9700000882148743, 0.9941669702529907], [[0.6421875, 0.5020833333333333, 0.759375, 0.94375]]]
[43, [[" can you help me move that pot of flowers?", "which pot are you talking about?"], [" the pot on the far right.", ""]], "<bin_904> <bin_499> <bin_965> <bin_544>", "", "openimages_v1.2/imgs/54c7b7a84edcd21b_Houseplant_Plant_Tree_Flower_Flowerpot_5.jpg", [0.8212500810623169, 0.5799999833106995, 0.9037500619888306, 0.6700000166893005], [[0.905, 0.499167, 0.965625, 0.544167]]]
[44, [[" i want a piece of cake.", "which piece do you want?"], [" i want the one on the plate.", "is it the one on the left?"], [" yes, this is it.", ""]], "<bin_1> <bin_1> <bin_438> <bin_467>", "", "openimages_v1.2/imgs/c4774b3ec4ba9685_Snack_Dessert_Mug_Coffee cup_Food_Saucer_Tableware_Baked goods_Cake_Pastry_1.jpg", [0.0, 0.0, 0.4387499690055847, 0.4675000309944153], [[0.0, 0.0, 0.4387499999999999, 0.4675]]]
[45, [[" i want to buy an ipod.", "which one do you want?"], [" i want the white one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, that's the one.", ""]], "<bin_95> <bin_280> <bin_260> <bin_588>", "", "openimages_v1.2/imgs/083b3e5da7c92d67_Ipod_4.jpg", [0.024413999170064926, 0.3524230122566223, 0.140625, 0.521291971206665], [[0.094727, 0.28047, 0.260742, 0.58884]]]
[46, [[" i want to eat a piece of cake.", "which one do you want?"], [" i want the one with a slice of lemon on it.", "is it the first piece from the right?"], [" yes, this is it.", "ok, i see."], [" yes, this is it.", ""]], "<bin_210> <bin_40> <bin_903> <bin_856>", "", "openimages_v1.2/imgs/590e8288ada253bb_Muffin_Dessert_Food_Fruit_Baked goods_Lemon_3.jpg", [0.21062497794628143, 0.03999999910593033, 0.9037500619888306, 0.8566669225692749], [[0.210625, 0.04, 0.9037500000000002, 0.856667]]]
[47, [[" can you give me a blender?", "yes, which one do you want?"], [" i want the one in the middle.", "ok, here you go."], [" i want the one in the middle of the second picture.", ""]], "<bin_336> <bin_343> <bin_662> <bin_921>", "", "openimages_v1.2/imgs/9c9cc9cee894d22e_Dessert_Food processor_Blender_Mixer_Juice_4.jpg", [0.33625730872154236, 0.34375, 0.6622806787490845, 0.921875], [[0.3362573, 0.34375, 0.6622807, 0.921875]]]
[48, [[" that zebra is so cute!", "which one are you talking about?"], [" the one on the right.", ""]], "<bin_552> <bin_623> <bin_857> <bin_948>", "", "openimages_v1.2/imgs/e81343339479569e_Zebra_3.jpg", [0.5525000095367432, 0.6233329772949219, 0.8574999570846558, 0.9491670727729797], [[0.5525, 0.623333, 0.8575000000000002, 0.949167]]]
[49, [[" i want to buy a sofa.", "which one do you want?"], [" i want the blue one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "okay, i see."], [" yes, this is it.", ""]], "<bin_163> <bin_575> <bin_552> <bin_891>", "", "openimages_v1.2/imgs/5ed1239f84352714_Couch_Furniture_3.jpg", [0.16285699605941772, 0.5752379894256592, 0.552856981754303, 0.89142906665802], [[0.162857, 0.575238, 0.552857, 0.891429]]]
[50, [[" that goldfish is so beautiful!", "which one are you talking about?"], [" the one that swims alone.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_449> <bin_234> <bin_517> <bin_302>", "", "openimages_v1.2/imgs/b005b2e77f4fd8ea_Goldfish_Fish_Animal_3.jpg", [0.2800000011920929, 0.35374999046325684, 0.4291669726371765, 0.4456250071525574], [[0.449167, 0.23375000000000004, 0.5175, 0.301875]]]
[51, [[" that duck is so cute!", "which one are you talking about?"], [" the one with the black head.", "is it the one on the far right?"], [" yes.", ""]], "<bin_781> <bin_404> <bin_965> <bin_558>", "", "openimages_v1.2/imgs/0391aa378a4094b3_Bird_Duck_12.jpg", [0.7818750143051147, 0.4043149948120117, 0.9662500619888306, 0.5581610202789307], [[0.781875, 0.404315, 0.9662500000000002, 0.558161]]]
[52, [[" can you give me a candle?", "which one do you want?"], [" i want the orange one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_297> <bin_665> <bin_333> <bin_745>", "", "openimages_v1.2/imgs/c6fe252a4e47916d_Candle_7.jpg", [0.2973395884037018, 0.6658878326416016, 0.3333333432674408, 0.7453271150588989], [[0.297339593114241, 0.6658878504672897, 0.3333333333333333, 0.7453271028037384]]]
[53, [[" i want to buy a toy.", "which one do you want?"], [" i want the yellow one.", "is it the first one from the left?"], [" yes, that's it.", "ok, i see."], [" yes, that's it.", ""]], "<bin_251> <bin_818> <bin_776> <bin_977>", "", "openimages_v1.2/imgs/b1455ef90644bb29_Toy_11.jpg", [0.3687500059604645, 0.3031249940395355, 0.476561963558197, 0.40781301259994507], [[0.251563, 0.81875, 0.776563, 0.978125]]]
[54, [[" that penguin is so cute!", "which one are you talking about?"], [" the biggest one.", ""]], "<bin_387> <bin_180> <bin_867> <bin_856>", "", "openimages_v1.2/imgs/5cc33103845e5519_Bird_Penguin_Animal_4.jpg", [0.3869970142841339, 0.1796880066394806, 0.8679050207138062, 0.857030987739563], [[0.386997, 0.179688, 0.867905, 0.8570309999999999]]]
[55, [[" can you give me that flower pot?", "which one do you want?"], [" i want the smallest one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_421> <bin_150> <bin_593> <bin_322>", "", "openimages_v1.2/imgs/3d30ba806da242a7_Flowerpot_3.jpg", [0.2918750047683716, 0.2819580137729645, 0.5456249713897705, 0.5457839965820312], [[0.421875, 0.14965986394557823, 0.59375, 0.3219954648526077]]]
[56, [[" this model is so beautiful!", "which one?"], [" this is the one in front of us.", "is it the closest to us?"], [" yes.", ""]], "<bin_140> <bin_51> <bin_784> <bin_943>", "", "openimages_v1.2/imgs/3befa89abdbf45e1_Toy_3.jpg", [0.14000000059604645, 0.05084700137376785, 0.7850000262260437, 0.9444440007209778], [[0.14, 0.050847, 0.785, 0.944444]]]
[57, [[" i want to eat a tomato.", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one on the far left.", ""]], "<bin_35> <bin_443> <bin_228> <bin_708>", "", "openimages_v1.2/imgs/ffb3a1e36fa649a0_Food_Tomato_Vegetable_4.jpg", [0.03541700169444084, 0.44305601716041565, 0.22812500596046448, 0.7083330154418945], [[0.035417, 0.443056, 0.228125, 0.708333]]]
[58, [[" i want to buy a painting.", "which one do you want?"], [" i want the one on the right.", "is it the first one from the right?"], [" no, it's the second one from the right.", "ok, i see."], [" that's right, that's it.", ""]], "<bin_802> <bin_380> <bin_870> <bin_478>", "", "openimages_v1.2/imgs/d21186e493ed0f2c_Picture frame_Furniture_Building_6.jpg", [0.7956249713897705, 0.4941670000553131, 0.8693750500679016, 0.5958330035209656], [[0.8025, 0.380833, 0.8706250000000001, 0.478333]]]
[59, [[" that cat is so cute!", "which one are you talking about?"], [" the black one.", "is it the one with the black head?"], [" yes.", ""]], "<bin_364> <bin_22> <bin_818> <bin_871>", "", "openimages_v1.2/imgs/7deef2d7713c7044_Carnivore_Cat_Animal_3.jpg", [0.3643749952316284, 0.02157600037753582, 0.8187500238418579, 0.8714820146560669], [[0.364375, 0.021576000000000005, 0.81875, 0.871482]]]
[60, [[" can you pass me that trash can?", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_893> <bin_137> <bin_990> <bin_321>", "", "openimages_v1.2/imgs/032fc8ee2e06c93d_Waste container_5.jpg", [0.8943750262260437, 0.13683199882507324, 0.9906250238418579, 0.3214620053768158], [[0.8943749999999999, 0.136832, 0.990625, 0.321462]]]
[61, [[" i want to buy a toy car.", "which one do you want?"], [" i want that green car.", "is it the first one from the right?"], [" yes, that's it.", ""]], "<bin_292> <bin_723> <bin_413> <bin_895>", "", "openimages_v1.2/imgs/8b240c08cb38382e_Toy_Bus_Land vehicle_Vehicle_13.jpg", [0.29249998927116394, 0.7232649922370911, 0.4131249785423279, 0.8958719968795776], [[0.2925, 0.723265, 0.4131250000000001, 0.895872]]]
[62, [[" can you give me that flower pot?", "which one do you want?"], [" i want the orange one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_1> <bin_178> <bin_566> <bin_997>", "", "openimages_v1.2/imgs/82cb111235f877d4_Houseplant_Plant_Flowerpot_3.jpg", [0.0, 0.17833299934864044, 0.5662500262260437, 0.9983329772949219], [[0.0, 0.178333, 0.56625, 0.998333]]]
[63, [[" can you pass me that picture?", "which one are you talking about?"], [" the one on the shelf.", "is it the one on the left?"], [" no, it's the one on the right.", "ok, here you go."], [" the one next to the yellow flowers.", ""]], "<bin_499> <bin_313> <bin_625> <bin_348>", "", "openimages_v1.2/imgs/6decd8fc369be088_Table_Picture frame_Furniture_Lamp_3.jpg", [0.4844779968261719, 0.28312501311302185, 0.6001880168914795, 0.3412500023841858], [[0.49953, 0.31375, 0.625588, 0.348125]]]
[64, [[" i want a drink.", "which one do you want?"], [" the one on the right.", "ok, here you go."], [" the one on the far right.", ""]], "<bin_788> <bin_170> <bin_993> <bin_411>", "", "openimages_v1.2/imgs/54d223fdfe3bfd7d_Dessert_Drink_Dairy Product_Food_Tableware_Rose_Flower_3.jpg", [0.7886409759521484, 0.1706250011920929, 0.9944130182266235, 0.41187500953674316], [[0.788641, 0.170625, 0.9944129999999999, 0.41187500000000005]]]
[65, [[" can you pass me that computer?", "which one are you talking about?"], [" the one on the left.", "ok, here you go."], [" the one on the far left.", ""]], "<bin_1> <bin_169> <bin_541> <bin_998>", "", "openimages_v1.2/imgs/5b39c13a7271eed4_Laptop_Computer keyboard_3.jpg", [0.0, 0.16885599493980408, 0.5418750047683716, 0.9990619421005249], [[0.0, 0.168856, 0.541875, 0.999062]]]
[66, [[" i want to eat a piece of bread.", "which piece do you want?"], [" i want the biggest piece.", "is it the first piece from the left?"], [" yes.", ""]], "<bin_148> <bin_313> <bin_597> <bin_987>", "", "openimages_v1.2/imgs/0e78d71b5ad1a6f1_Fast food_Food_Bread_4.jpg", [0.1484380066394806, 0.3133240044116974, 0.5976560115814209, 0.9882869720458984], [[0.148438, 0.313324, 0.597656, 0.988287]]]
[67, [[" can you pass me that saucer?", "which one do you want?"], [" i want the saucer under the cup.", "ok, here you go."], [" i want the saucer under the cup of tea.", ""]], "<bin_226> <bin_681> <bin_822> <bin_928>", "", "openimages_v1.2/imgs/24831c51ae84b063_Food_Teapot_Saucer_Tea_1.jpg", [0.22607901692390442, 0.6812499761581421, 0.8227020502090454, 0.9293749928474426], [[0.22607900000000003, 0.68125, 0.822702, 0.929375]]]
[68, [[" can you give me that toy?", "which one do you want?"], [" i want the green one.", "is it the one with the green head?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_240> <bin_345> <bin_540> <bin_902>", "", "openimages_v1.2/imgs/ddbc0565e70105a4_Toy_Carnivore_Dog_Animal_2.jpg", [0.24062499403953552, 0.345814973115921, 0.5406249761581421, 0.9030836820602417], [[0.240625, 0.3458149779735683, 0.540625, 0.9030837004405287]]]
[69, [[" can you pass me that plate?", "which plate are you talking about?"], [" the plate with vegetables.", "ok, here you go."], [" the plate in the middle.", ""]], "<bin_107> <bin_450> <bin_925> <bin_744>", "", "openimages_v1.2/imgs/0108b75ebb6140a3_Food_Platter_5.jpg", [0.10750000178813934, 0.44999998807907104, 0.9262499809265137, 0.7450000047683716], [[0.1075, 0.45, 0.9262499999999999, 0.745]]]
[70, [[" this bird is so cute!", "which one?"], [" this is the one in front of us.", "is it the closest one to us?"], [" no, it's on top of the bowl.", ""]], "<bin_573> <bin_277> <bin_985> <bin_695>", "", "openimages_v1.2/imgs/b7da0ce8a1a9f923_Bird_Sparrow_Animal_3.jpg", [0.09187500178813934, 0.5581009984016418, 0.4074999690055847, 0.8363340497016907], [[0.573125, 0.277414, 0.985625, 0.695581]]]
[71, [[" i want to buy a doll.", "which one do you want?"], [" i want the yellow one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, this is the doll.", ""]], "<bin_507> <bin_28> <bin_656> <bin_504>", "", "openimages_v1.2/imgs/9bacd8f5b12886e6_Toy_17.jpg", [0.901562511920929, 0.7840375304222107, 0.9984375238418579, 0.9976526498794556], [[0.5078125, 0.028169014084507043, 0.65625, 0.5046948356807511]]]
[72, [[" i want a bottle of beer.", "which bottle do you want?"], [" i want the bottle with the blue label.", "is it the first bottle from the right?"], [" yes, that bottle.", ""]], "<bin_659> <bin_121> <bin_759> <bin_906>", "", "openimages_v1.2/imgs/2ff73abfc97e25df_Beer_Drink_Bottle_6.jpg", [0.8671875, 0.2594752311706543, 0.9937499761581421, 0.9125364422798157], [[0.659375, 0.121354, 0.759375, 0.906651]]]
[73, [[" i want a piece of cake.", "which piece do you want?"], [" i want the one on the white plate.", "is it the one on the left?"], [" no, it's the one on the right.", "okay, here you go."], [" the one with the pink cream on it.", ""]], "<bin_291> <bin_438> <bin_436> <bin_687>", "", "openimages_v1.2/imgs/0dfe517e0d588242_Fast food_Snack_Dessert_Food_Baked goods_Pastry_15.jpg", [0.1640625, 0.15926893055438995, 0.2578125, 0.2819843292236328], [[0.29125, 0.438413, 0.4362500000000001, 0.687891]]]
[74, [[" can you pass me that lego?", "which one do you want?"], [" i want the pink one.", "is it the one on the far right?"], [" no, it's the biggest one.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_645> <bin_352> <bin_919> <bin_773>", "", "openimages_v1.2/imgs/23cd270fb0b87b1e_Toy_6.jpg", [0.6456249952316284, 0.35261398553848267, 0.9199999570846558, 0.7741940021514893], [[0.645625, 0.352614, 0.9200000000000002, 0.7741940000000002]]]
[75, [[" can you give me a piece of cake?", "which piece do you want?"], [" i want the one in the lower left corner.", "ok, here you go."], [" i want the one in the lower left corner.", ""]], "<bin_213> <bin_847> <bin_420> <bin_910>", "", "openimages_v1.2/imgs/ea22b657ef00566f_Muffin_Food_7.jpg", [0.21278901398181915, 0.8481249809265137, 0.4200659692287445, 0.9112499952316284], [[0.212789, 0.8481249999999999, 0.420066, 0.9112500000000001]]]
[76, [[" that baboon is so cute!", "which one?"], [" the small one.", "is it the one on the far right?"], [" no, it's the one on the left.", ""]], "<bin_253> <bin_367> <bin_448> <bin_701>", "", "openimages_v1.2/imgs/3796a9c637a86cd3_Monkey_Animal_3.jpg", [0.2534559965133667, 0.367451012134552, 0.44854098558425903, 0.702148973941803], [[0.253456, 0.367451, 0.448541, 0.702149]]]
[77, [[" i want to buy a picture.", "which one do you want?"], [" i want the black one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" i want the first one from the right.", ""]], "<bin_783> <bin_359> <bin_889> <bin_538>", "", "openimages_v1.2/imgs/d98e42fa9b417241_Picture frame_Window_4.jpg", [0.7837499976158142, 0.35910698771476746, 0.8899999856948853, 0.5386599898338318], [[0.78375, 0.359107, 0.89, 0.53866]]]
[78, [[" can you give me an apple?", "which one do you want?"], [" i want the yellow one.", "is it the one on the far right?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_700> <bin_166> <bin_947> <bin_746>", "", "openimages_v1.2/imgs/9a94771ecbe28f1a_Apple_Food_Fruit_4.jpg", [0.700780987739563, 0.16647100448608398, 0.9484379887580872, 0.7467759847640991], [[0.700781, 0.166471, 0.948438, 0.746776]]]
[79, [[" can you pass me that trash can?", "which one are you talking about?"], [" the blue one.", "is it the one on the far right?"], [" yes, this is it.", "ok, here it is."], [" yes, this is it.", ""]], "<bin_543> <bin_193> <bin_661> <bin_582>", "", "openimages_v1.2/imgs/436d1b6f16686dea_Waste container_4.jpg", [0.543749988079071, 0.1933329999446869, 0.6612499952316284, 0.5824999809265137], [[0.54375, 0.193333, 0.66125, 0.5825]]]
[80, [[" i want to eat a tomato.", "which one do you want?"], [" i want the small one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_570> <bin_485> <bin_896> <bin_716>", "", "openimages_v1.2/imgs/63e52a01a767e015_Food_Tomato_Vegetable_Shrimp_4.jpg", [0.4906249940395355, 0.5874999761581421, 0.8578130006790161, 0.9833329916000366], [[0.570312, 0.485417, 0.896875, 0.716667]]]
[81, [[" i want to use that urinal.", "which one?"], [" the one in the middle.", ""]], "<bin_583> <bin_159> <bin_820> <bin_436>", "", "openimages_v1.2/imgs/754ef77cb5aa0d6d_Bathroom accessory_Bidet_Toilet_7.jpg", [0.3854166567325592, 0.17499999701976776, 0.75, 0.5546875], [[0.5833333, 0.159375, 0.8208333, 0.4359375]]]
[82, [[" can you pass me that flag?", "which one are you talking about?"], [" the red one.", "is it the one on the far right?"], [" no, it's the top one.", ""]], "<bin_317> <bin_27> <bin_369> <bin_69>", "", "openimages_v1.2/imgs/02e5d2d5d2e6f838_Boat_Watercraft_Flag_3.jpg", [0.31687501072883606, 0.027204999700188637, 0.36937499046325684, 0.06941799819469452], [[0.316875, 0.027205, 0.369375, 0.069418]]]
[83, [[" can you give me a loaf of bread?", "which one do you want?"], [" i want the middle one.", "ok, here you go."], [" i want the middle one.", ""]], "<bin_287> <bin_165> <bin_873> <bin_509>", "", "openimages_v1.2/imgs/33588b1ccdf296cd_Food_Baked goods_Bread_3.jpg", [0.2874999940395355, 0.16500000655651093, 0.8737499117851257, 0.5099999904632568], [[0.2875, 0.165, 0.87375, 0.51]]]
[84, [[" that chicken is so cute!", "which one are you talking about?"], [" the yellow one.", "is it the one on the far right?"], [" no, it's the one on the far right.", ""]], "<bin_547> <bin_388> <bin_813> <bin_852>", "", "openimages_v1.2/imgs/79236bccf83066ca_Plant_Chicken_Animal_5.jpg", [0.5477308034896851, 0.3888888955116272, 0.8137715458869934, 0.8527777791023254], [[0.5477308294209703, 0.3888888888888889, 0.8137715179968701, 0.8527777777777777]]]
[85, [[" can you give me a cookie?", "which one do you want?"], [" i want the top one.", "ok, here you go."], [" i want the one near the fork.", ""]], "<bin_331> <bin_171> <bin_744> <bin_546>", "", "openimages_v1.2/imgs/14cedb6a84bbd85b_Pancake_5.jpg", [0.6681249737739563, 0.15416699647903442, 0.9993749856948853, 0.5933330059051514], [[0.33125, 0.171667, 0.744375, 0.546667]]]
[86, [[" i want to buy a soccer ball.", "which one do you want?"], [" i want the white one.", "is it the first one from the right?"], [" no, it's the second one from the right.", "ok, i see."], [" i want the second one from the right.", ""]], "<bin_703> <bin_432> <bin_785> <bin_536>", "", "openimages_v1.2/imgs/89bbe09e3ae63eee_Football_Ball_Sports equipment_6.jpg", [0.7037500143051147, 0.4327380061149597, 0.7856249809265137, 0.536217987537384], [[0.70375, 0.43273800000000007, 0.785625, 0.536218]]]
[87, [[" i want a bottle of wine.", "which bottle do you want?"], [" i want the bottle next to the white wine.", "is it the bottle with the red label?"], [" no, it's the bottle with the black label.", "okay, i see."], [" i want the bottle next to the white wine.", ""]], "<bin_229> <bin_319> <bin_304> <bin_869>", "", "openimages_v1.2/imgs/b5131a9b28785a86_Drink_Bottle_4.jpg", [0.22949199378490448, 0.3191800117492676, 0.3046880066394806, 0.8696929812431335], [[0.22949199999999997, 0.31918, 0.304688, 0.869693]]]
[88, [[" can you pass me that glass of wine?", "which glass do you want?"], [" i want the first glass.", "is it the first glass from the right?"], [" no, it's the first glass from the left.", "ok, i see."], [" yes, that's right.", ""]], "<bin_100> <bin_495> <bin_189> <bin_604>", "", "openimages_v1.2/imgs/efd13a8dcab6d7f4_Drink_Table_Tableware_Bottle_Wine_Wine glass_Furniture_5.jpg", [0.10000000149011612, 0.4958333373069763, 0.18906250596046448, 0.6041666865348816], [[0.1, 0.49583333333333335, 0.1890625, 0.6041666666666666]]]
[89, [[" that cat is so cute!", "which one?"], [" the black one.", "is it the one on the far right?"], [" no, it's on the floor.", ""]], "<bin_118> <bin_533> <bin_373> <bin_678>", "", "openimages_v1.2/imgs/0047264e06512e75_Cat_Carnivore_Cat furniture_5.jpg", [0.707161009311676, 0.13556300103664398, 0.8925830125808716, 0.24559900164604187], [[0.117647, 0.533451, 0.373402, 0.678697]]]
[90, [[" i want to eat an apple.", "which one do you want?"], [" i want the top one.", "is it the first one from the left?"], [" yes, that's it.", ""]], "<bin_635> <bin_126> <bin_795> <bin_399>", "", "openimages_v1.2/imgs/70887fdeae8c46af_Apple_Food_Fruit_5.jpg", [0.6359379887580872, 0.12616799771785736, 0.7953130006790161, 0.3995330035686493], [[0.635938, 0.126168, 0.795313, 0.39953299999999997]]]
[91, [[" that cow is so cute!", "which one are you talking about?"], [" the black one.", "is it the one on the far right?"], [" no, it's the one on the far left.", ""]], "<bin_222> <bin_450> <bin_443> <bin_755>", "", "openimages_v1.2/imgs/ff9ca33525158d68_Cattle_Animal_7.jpg", [0.22187499701976776, 0.44999998807907104, 0.4437499940395355, 0.7562500238418579], [[0.221875, 0.45, 0.44375, 0.75625]]]
[92, [[" this dog is so cute!", "which one are you talking about?"], [" the one in front of us.", "is it the one on the far right?"], [" no, it's the one in front of us.", ""]], "<bin_367> <bin_82> <bin_889> <bin_998>", "", "openimages_v1.2/imgs/17d22bb244e364af_Carnivore_Dog_Animal_8.jpg", [0.3675000071525574, 0.08161400258541107, 0.8899999856948853, 0.9990619421005249], [[0.3675, 0.081614, 0.89, 0.999062]]]
[93, [[" that panda is so cute!", "which one are you talking about?"], [" the one in the middle.", ""]], "<bin_296> <bin_301> <bin_533> <bin_653>", "", "openimages_v1.2/imgs/329035dece1a9746_Panda_4.jpg", [0.3499999940395355, 0.4608429968357086, 0.621999979019165, 0.9789159893989563], [[0.296, 0.301205, 0.534, 0.653614]]]
[94, [[" can you pass me that paddle?", "which one do you want?"], [" i want the yellow one.", "is it the one on the far right?"], [" no, it's the one on the far left.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_1> <bin_95> <bin_998> <bin_331>", "", "openimages_v1.2/imgs/189d6a75be8664d3_Paddle_Boat_Sports equipment_Vehicle_Canoe_4.jpg", [0.0, 0.09514400362968445, 0.9993749856948853, 0.3310210108757019], [[0.0, 0.09514400000000002, 0.999375, 0.331021]]]
[95, [[" that duck is so cute!", "which one are you talking about?"], [" the one in the middle.", ""]], "<bin_280> <bin_194> <bin_424> <bin_358>", "", "openimages_v1.2/imgs/06a9185dbbda0fc6_Bird_Duck_5.jpg", [0.44600000977516174, 0.2796989977359772, 0.6159999966621399, 0.43909797072410583], [[0.28, 0.193985, 0.42400000000000004, 0.35789499999999996]]]
[96, [[" i want to eat a carrot.", "which one do you want?"], [" i want the yellow one.", "is it the one on the far right?"], [" no, it's the one on the far left.", "okay, here you go."], [" yes, this is the one.", ""]], "<bin_212> <bin_558> <bin_757> <bin_905>", "", "openimages_v1.2/imgs/d93a9ec22d44e493_Food_Fruit_Carrot_Radish_Vegetable_5.jpg", [0.20280811190605164, 0.3891213536262512, 0.8127925992012024, 0.8305438756942749], [[0.21216847999999996, 0.5585774, 0.75819033, 0.90585774]]]
[97, [[" i want a glass of wine.", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one on the far left.", ""]], "<bin_6> <bin_502> <bin_188> <bin_773>", "", "openimages_v1.2/imgs/0f243291f28cfc0c_Drink_Tableware_Wine_Wine glass_8.jpg", [0.0062500000931322575, 0.5022220015525818, 0.18812499940395355, 0.7733330130577087], [[0.00625, 0.502222, 0.188125, 0.7733329999999999]]]
[98, [[" i want to buy a pot of flowers.", "which pot do you want?"], [" i want the pot of white flowers.", "is it the first pot from the left?"], [" yes, this is the pot.", ""]], "<bin_262> <bin_706> <bin_362> <bin_910>", "", "openimages_v1.2/imgs/a73d525c14d5142d_Houseplant_Plant_Flower_Flowerpot_3.jpg", [0.26249998807907104, 0.7066670060157776, 0.36250001192092896, 0.9111109972000122], [[0.2625, 0.706667, 0.3625, 0.911111]]]
[99, [[" that cow is so cute!", "which one are you talking about?"], [" the brown one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_731> <bin_464> <bin_897> <bin_906>", "", "openimages_v1.2/imgs/551f195eb42d1b21_Cattle_Plant_Tree_Animal_3.jpg", [0.731249988079071, 0.46483200788497925, 0.8981249928474426, 0.9067279696464539], [[0.73125, 0.464832, 0.898125, 0.906728]]]
[100, [[" i want to buy a toy.", "which one do you want?"], [" i want that robot.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, that's it.", ""]], "<bin_315> <bin_129> <bin_661> <bin_816>", "", "openimages_v1.2/imgs/a03c3d1f1fa60a0e_Toy_3.jpg", [0.692969024181366, 0.28194400668144226, 0.9703119993209839, 0.7986109852790833], [[0.315625, 0.129167, 0.661719, 0.8166670000000001]]]
[101, [[" that duck is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_518> <bin_222> <bin_560> <bin_329>", "", "openimages_v1.2/imgs/31528438686916f2_Bird_Duck_Goose_Animal_18.jpg", [0.05312500149011612, 0.27250000834465027, 0.19249999523162842, 0.42750000953674316], [[0.518125, 0.2225, 0.560625, 0.329167]]]
[102, [[" that sea lion is so cute!", "which one are you talking about?"], [" the one lying on its stomach.", "is it the one on the far right?"], [" yes.", ""]], "<bin_570> <bin_171> <bin_997> <bin_360>", "", "openimages_v1.2/imgs/523c2d64cf2561ae_Harbor seal_8.jpg", [0.5737500190734863, 0.16885599493980408, 0.9993749856948853, 0.32270199060440063], [[0.5703125, 0.17096018735362997, 0.9984375, 0.36065573770491804]]]
[103, [[" can you pass me that throw pillow?", "which one do you want?"], [" i want the one on the right.", "ok, here you go."], [" i want the one with the letters on it.", ""]], "<bin_475> <bin_215> <bin_965> <bin_790>", "", "openimages_v1.2/imgs/997b20e734b29a3d_Couch_Pillow_Bed_2.jpg", [0.47552400827407837, 0.21518997848033905, 0.9664340019226074, 0.7911390066146851], [[0.47552399999999995, 0.21519, 0.9664339999999999, 0.791139]]]
[104, [[" that cow is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_575> <bin_353> <bin_687> <bin_454>", "", "openimages_v1.2/imgs/9e3fe28c9d3b966a_Cattle_Plant_Animal_3.jpg", [0.5756250023841858, 0.3533329963684082, 0.6881250143051147, 0.454166978597641], [[0.575625, 0.353333, 0.688125, 0.454167]]]
[105, [[" that cat is so cute!", "which one are you talking about?"], [" the black and white one.", "is it the one on the far right?"], [" no, it's the one on the left.", ""]], "<bin_40> <bin_147> <bin_268> <bin_496>", "", "openimages_v1.2/imgs/5643d9d11f869ebf_Dog_Toy_Table_Tableware_10.jpg", [0.09579439461231232, 0.14710484445095062, 0.2710280418395996, 0.449139267206192], [[0.0397196261682243, 0.14710485133020346, 0.26869158878504673, 0.49608763693270735]]]
[106, [[" i want a glass of orange juice.", "which one do you want?"], [" i want the one with the straw.", "is it the one on the far right?"], [" no, it's the one in front of us.", "okay, here you go."], [" yes.", ""]], "<bin_123> <bin_524> <bin_343> <bin_698>", "", "openimages_v1.2/imgs/f44c87b6f0f9acf6_Person_Juice_Cocktail_Dessert_Drink_Food_Bottle_Baked goods_Human face_2.jpg", [0.12291667610406876, 0.5249999761581421, 0.34375, 0.698437511920929], [[0.12291666666666666, 0.525, 0.34375, 0.6984375]]]
[107, [[" can you pass me that cup of coffee?", "which cup do you want?"], [" i want the cup on the saucer.", "is it the first cup from the right?"], [" yes, this is the cup.", "ok, i see."], [" yes, this is the cup.", ""]], "<bin_69> <bin_101> <bin_466> <bin_591>", "", "openimages_v1.2/imgs/ab71b8d7af435653_Drink_Coffee cup_Coffee_Food_Tableware_Tea_1.jpg", [0.06875000149011612, 0.10131300985813141, 0.4662500023841858, 0.5919319987297058], [[0.06875, 0.101313, 0.46624999999999994, 0.591932]]]
[108, [[" i want to eat a piece of bread.", "which piece do you want?"], [" i want the one on the plate.", "is it the one on the left?"], [" yes, that's it.", "okay, i see."], [" i want the one in the upper left corner.", ""]], "<bin_4> <bin_1> <bin_291> <bin_170>", "", "openimages_v1.2/imgs/1685f2f5fafff6c5_Fast food_Food_Fruit_Platter_Sandwich_3.jpg", [0.0, 0.3075000047683716, 0.36937499046325684, 0.7875000238418579], [[0.00375, 0.0, 0.29125, 0.17]]]
[109, [[" i want to buy a guitar.", "which one do you want?"], [" i want the white one.", "is it the first one from the right?"], [" yes, that's it.", ""]], "<bin_792> <bin_487> <bin_945> <bin_696>", "", "openimages_v1.2/imgs/0060e23b3e8ad94e_Drum_Guitar_3.jpg", [0.7931249737739563, 0.48780497908592224, 0.9462500810623169, 0.6969980001449585], [[0.793125, 0.487805, 0.94625, 0.696998]]]
[110, [[" i want to buy a jack-o-lantern.", "which one do you want?"], [" i want the one on the ground.", "is it the one on the far right?"], [" no, it's the one on the far left.", "ok, here you go."], [" i want the one in the middle.", ""]], "<bin_339> <bin_603> <bin_485> <bin_707>", "", "openimages_v1.2/imgs/a7c89e0ff329a4c6_Plant_Pumpkin_Squash (Plant)_Vegetable_6.jpg", [0.2769869863986969, 0.5881249904632568, 0.38493698835372925, 0.65625], [[0.338912, 0.60375, 0.485356, 0.7075]]]
[111, [[" that little bear is so cute!", "which one are you talking about?"], [" the small one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_476> <bin_703> <bin_587> <bin_879>", "", "openimages_v1.2/imgs/cad11f9d071fb6ec_Bear_Tree_Animal_3.jpg", [0.4762499928474426, 0.7041670083999634, 0.5874999761581421, 0.8799999952316284], [[0.47625, 0.704167, 0.5875, 0.88]]]
[112, [[" that cow is so cute!", "which one are you talking about?"], [" the brown one.", "is it the one on the far right?"], [" yes, its head is white.", ""]], "<bin_366> <bin_238> <bin_978> <bin_420>", "", "openimages_v1.2/imgs/0aa5ee398fc85db1_Cattle_Tree_Animal_6.jpg", [0.3665269911289215, 0.2381249964237213, 0.9790790677070618, 0.4206250309944153], [[0.366527, 0.238125, 0.979079, 0.4206250000000001]]]
[113, [[" i want to eat a waffle.", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the first one on the left.", ""]], "<bin_74> <bin_153> <bin_405> <bin_699>", "", "openimages_v1.2/imgs/bf5b362c3a8a661f_Fast food_Waffle_4.jpg", [0.07437500357627869, 0.15305200219154358, 0.4050000309944153, 0.6995310187339783], [[0.074375, 0.153052, 0.4050000000000001, 0.699531]]]
[114, [[" that train is so beautiful, i want one.", "which one do you want?"], [" i want the one on the far right.", "is it the first one from the right?"], [" yes, this is the one.", ""]], "<bin_671> <bin_154> <bin_998> <bin_271>", "", "openimages_v1.2/imgs/19d8fec0aa042e8c_Tree_Land vehicle_Train_Vehicle_Wheel_5.jpg", [0.4324999749660492, 0.3325839936733246, 0.9993749856948853, 0.6951310038566589], [[0.671875, 0.154307, 0.999375, 0.271161]]]
[115, [[" that cow is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_485> <bin_777> <bin_592> <bin_827>", "", "openimages_v1.2/imgs/76f895318cd7188b_Cattle_Plant_Tree_Animal_3.jpg", [0.48583298921585083, 0.778124988079071, 0.5924999713897705, 0.8274999856948853], [[0.485833, 0.778125, 0.5925, 0.8275]]]
[116, [[" can you give me a tomato?", "which one do you want?"], [" i want the top one.", "is it the one on the far right?"], [" no, it's the one in the middle.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_548> <bin_191> <bin_655> <bin_348>", "", "openimages_v1.2/imgs/2554ed142bc25376_Fruit_Tomato_Orange_33.jpg", [0.48513302206993103, 0.24766354262828827, 0.5712050199508667, 0.3901869058609009], [[0.548125, 0.19141, 0.655625, 0.348273], [0.896875, 0.7768440000000001, 0.999375, 0.953315]]]
[117, [[" i want a bottle of wine.", "which bottle do you want?"], [" i want the bottle with the white label.", "is it the first bottle from the left?"], [" yes, that bottle.", ""]], "<bin_306> <bin_17> <bin_490> <bin_429>", "", "openimages_v1.2/imgs/137f0c0dfcdd2538_Drink_Bottle_Wine_2.jpg", [0.31666699051856995, 0.0, 0.4625000059604645, 0.4137499928474426], [[0.305833, 0.0175, 0.49, 0.429375]]]
[118, [[" that baboon is so cute!", "which one?"], [" the one in front of us.", "is it the closest to us?"], [" no, the one on the far right.", ""]], "<bin_767> <bin_601> <bin_886> <bin_833>", "", "openimages_v1.2/imgs/27c77ce044a249da_Mammal_Plant_Tree_Monkey_Animal_5.jpg", [0.7673670053482056, 0.6016449928283691, 0.886914074420929, 0.8343130350112915], [[0.767367, 0.601645, 0.886914, 0.834313]]]
[119, [[" can you pass me that bottle of wine?", "which bottle do you want?"], [" i want the bottle on the far right.", "ok, here you go."], [" i want the bottle with the red lid.", ""]], "<bin_631> <bin_165> <bin_791> <bin_617>", "", "openimages_v1.2/imgs/2e1d5aaa143b49a8_Drink_Food_Bottle_Wine_Vegetable_Salad_3.jpg", [0.6312500238418579, 0.16562500596046448, 0.7916669845581055, 0.6171879768371582], [[0.63125, 0.165625, 0.791667, 0.617188]]]
[120, [[" that flower is so beautiful!", "which one are you talking about?"], [" the purple one.", "is it the one on the far right?"], [" no, it's the one on the far left.", ""]], "<bin_56> <bin_243> <bin_276> <bin_405>", "", "openimages_v1.2/imgs/5c9f8bcf7d20fa72_Vase_Plant_Flower_Flowerpot_1.jpg", [0.0686269998550415, 0.3790850043296814, 0.2810460031032562, 0.6209149956703186], [[0.05555555555555555, 0.2434640522875817, 0.2761437908496732, 0.40522875816993464]]]
[121, [[" that cow is so cute!", "which one are you talking about?"], [" the black and white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_471> <bin_622> <bin_612> <bin_763>", "", "openimages_v1.2/imgs/6ac1dcb0b40a398f_Cattle_Plant_Tree_Animal_14.jpg", [0.3318749964237213, 0.6829270124435425, 0.41749995946884155, 0.7439020276069641], [[0.47125000000000006, 0.622889, 0.6125, 0.7636020000000001]]]
[122, [[" that christmas tree is so beautiful!", "which one are you talking about?"], [" the one on the far right.", ""]], "<bin_893> <bin_539> <bin_998> <bin_791>", "", "openimages_v1.2/imgs/ca28b662f83cb815_Christmas tree_Plant_Tree_4.jpg", [0.7637500166893005, 0.1633239984512329, 0.9037500619888306, 0.7545369863510132], [[0.894375, 0.539637, 0.999375, 0.791786]]]
[123, [[" that flower is so beautiful!", "which one are you talking about?"], [" the yellow one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_381> <bin_258> <bin_507> <bin_420>", "", "openimages_v1.2/imgs/7ac7569585ce1802_Dessert_Drink_Coffee cup_Vase_Coffee_Food_Saucer_Table_Tableware_Baked goods_Plant_Rose_Tea_Flower_Flowerpot_3.jpg", [0.5718749761581421, 0.2421875, 0.667187511920929, 0.3765625059604645], [[0.38125, 0.257812, 0.507812, 0.420312]]]
[124, [[" i want a drink.", "which one do you want?"], [" the one in front of the bottle.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_403> <bin_657> <bin_513> <bin_950>", "", "openimages_v1.2/imgs/e50b299c3de8a401_Drink_Bottle_4.jpg", [0.4037500023841858, 0.6575000286102295, 0.5137500166893005, 0.9508330225944519], [[0.40374999999999994, 0.6575, 0.51375, 0.950833]]]
[125, [[" can you give me that pot of succulents?", "which pot do you want?"], [" i want the pot in the middle.", "ok, here you go."], [" i want the pot on the right in the second row.", "ok, here you go."], [" i want the second pot from the right in the second row.", "ok, here you go."], [" yes, this is the pot.", ""]], "<bin_589> <bin_672> <bin_826> <bin_816>", "", "openimages_v1.2/imgs/aaab35b8b689f3ad_Houseplant_Plant_Flower_Flowerpot_9.jpg", [0.2986670136451721, 0.6330000162124634, 0.4946669936180115, 0.8529999852180481], [[0.589333, 0.673, 0.826667, 0.817]]]
[126, [[" that cow is so cute!", "which one are you talking about?"], [" the one standing.", "is it the one on the far right?"], [" yes.", ""]], "<bin_495> <bin_485> <bin_691> <bin_740>", "", "openimages_v1.2/imgs/084723607f56c2c7_Cattle_5.jpg", [0.49562495946884155, 0.485929012298584, 0.6918749809265137, 0.7410879731178284], [[0.495625, 0.485929, 0.691875, 0.741088]]]
[127, [[" i want to rent a car.", "which one do you want to buy?"], [" i want to buy the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", "okay, here you go."], [" there are two white cars in front of it, right?", ""]], "<bin_671> <bin_610> <bin_727> <bin_651>", "", "openimages_v1.2/imgs/550259b3c344de64_Land vehicle_Truck_Vehicle_Van_Car_3.jpg", [0.4256249964237213, 0.8016670346260071, 0.5456249713897705, 0.887499988079071], [[0.671875, 0.6104166666666667, 0.728125, 0.6520833333333333]]]
[128, [[" that hot air balloon is so beautiful!", "which one are you talking about?"], [" the yellow one.", "is it the one on the far right?"], [" no, it's the one on the far left.", ""]], "<bin_97> <bin_329> <bin_312> <bin_818>", "", "openimages_v1.2/imgs/9c2405bb256b4be4_Balloon_7.jpg", [0.09749999642372131, 0.32888901233673096, 0.3125, 0.8188890218734741], [[0.0975, 0.328889, 0.3125, 0.8188889999999999]]]
[129, [[" i want to eat a piece of cake.", "which piece do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one on the far left.", ""]], "<bin_19> <bin_26> <bin_496> <bin_610>", "", "openimages_v1.2/imgs/3257f149846873fc_Muffin_Snack_Dessert_Food_Baked goods_3.jpg", [0.019375000149011612, 0.026464000344276428, 0.49687498807907104, 0.611066997051239], [[0.019375, 0.026464, 0.496875, 0.611067]]]
[130, [[" i want a drink.", "which one do you want?"], [" i want that glass of coke.", "is it the first glass from the right?"], [" no, it's the first glass from the left.", "okay, i see."], [" i want the one with the straw in it.", ""]], "<bin_142> <bin_115> <bin_323> <bin_423>", "", "openimages_v1.2/imgs/3886690cfd40fc0f_Drink_Coffee cup_Food_Tableware_Seafood_5.jpg", [0.14218750596046448, 0.1147540956735611, 0.32343751192092896, 0.42388758063316345], [[0.1421875, 0.11475409836065574, 0.3234375, 0.4238875878220141]]]
[131, [[" that alpaca is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one next to the brown one.", ""]], "<bin_446> <bin_523> <bin_570> <bin_700>", "", "openimages_v1.2/imgs/786c8ef98772cdcc_Animal_Alpaca_6.jpg", [0.4466559886932373, 0.5236719846725464, 0.5708600282669067, 0.7004830241203308], [[0.446656, 0.523672, 0.57086, 0.700483]]]
[132, [[" i want to eat a piece of bread.", "which piece do you want?"], [" the one in the middle.", "is it the one with the whole piece?"], [" no, it's the one next to it.", ""]], "<bin_345> <bin_61> <bin_676> <bin_276>", "", "openimages_v1.2/imgs/7ccafb823be060ec_Fast food_Muffin_Snack_Dessert_Food_Baked goods_9.jpg", [0.09843750298023224, 0.109375, 0.32343751192092896, 0.2828125059604645], [[0.345313, 0.060938000000000006, 0.676562, 0.276563]]]
[133, [[" that little duck is so cute!", "which one are you talking about?"], [" the one in the middle.", ""]], "<bin_351> <bin_504> <bin_509> <bin_688>", "", "openimages_v1.2/imgs/9717a48ed1fba90d_Bird_Plant_Duck_Animal_7.jpg", [0.4287499785423279, 0.45833295583724976, 0.5643749833106995, 0.5616670250892639], [[0.35125, 0.504167, 0.51, 0.689167]]]
[134, [[" i want to eat a strawberry.", "which one do you want?"], [" i want the smallest one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "okay, i see."], [" yes, that's it.", ""]], "<bin_782> <bin_852> <bin_998> <bin_998>", "", "openimages_v1.2/imgs/3e34fa86524a567e_Food_Strawberry_3.jpg", [0.7825000286102295, 0.8528579473495483, 0.9987499117851257, 0.9990630149841309], [[0.7825, 0.852858, 0.99875, 0.9990629999999999]]]
[135, [[" can you give me a pillow?", "which one do you want?"], [" i want the pink one.", "is it the one on the left?"], [" yes, this is it.", "ok, here you go."], [" i want the one behind the green pillow.", ""]], "<bin_389> <bin_355> <bin_465> <bin_480>", "", "openimages_v1.2/imgs/043fb662ccfaea78_Chair_Nightstand_Pillow_Curtain_Bed_Furniture_Building_7.jpg", [0.3890624940395355, 0.35519126057624817, 0.46562498807907104, 0.48087435960769653], [[0.3890625, 0.3551912568306011, 0.465625, 0.4808743169398907]]]
[136, [[" can you pass me that roll of toilet paper?", "which roll do you want?"], [" i want the middle roll.", "ok, here you go."], [" i want the middle roll.", ""]], "<bin_387> <bin_200> <bin_532> <bin_292>", "", "openimages_v1.2/imgs/3eedd2a40c26f8fa_Paper towel_Toilet paper_4.jpg", [0.3872790038585663, 0.2006249874830246, 0.5322059988975525, 0.29249998927116394], [[0.38727899999999993, 0.200625, 0.532206, 0.2925]]]
[137, [[" i want a balloon.", "which one do you want?"], [" i want the pink one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" i want the first one from the right.", ""]], "<bin_824> <bin_624> <bin_862> <bin_705>", "", "openimages_v1.2/imgs/0b6a2c03b17b4fed_Balloon_3.jpg", [0.9387500882148743, 0.5341669917106628, 0.987500011920929, 0.625], [[0.825, 0.625, 0.8625, 0.705833]]]
[138, [[" can you pass me that blender?", "which one do you want?"], [" i want the one in the back.", "ok, here you go."], [" i want the one in the middle.", ""]], "<bin_228> <bin_90> <bin_501> <bin_376>", "", "openimages_v1.2/imgs/da63b2e9d5b238d0_Bottle_Cream_5.jpg", [0.3245779871940613, 0.2487500011920929, 0.6022509932518005, 0.6006249785423279], [[0.227955, 0.09, 0.501876, 0.376875]]]
[139, [[" i want a glass of orange juice.", "which one do you want?"], [" i want the one next to the plate.", "ok, here you go."], [" i want the one on the right.", ""]], "<bin_699> <bin_192> <bin_834> <bin_285>", "", "openimages_v1.2/imgs/5525e588a40b125b_Juice_Dessert_Dairy Product_Mug_Food_Tableware_Baked goods_Pitcher (Container)_Jug_2.jpg", [0.699999988079071, 0.19187499582767487, 0.8343750238418579, 0.28562501072883606], [[0.7, 0.191875, 0.834375, 0.285625]]]
[140, [[" i want to eat a strawberry.", "which one do you want?"], [" i want the biggest one.", "is it the one on the far right?"], [" yes, this is it.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_524> <bin_151> <bin_701> <bin_391>", "", "openimages_v1.2/imgs/d10dd1ef507d3e59_Dairy Product_Baked goods_Strawberry_Cake_2.jpg", [0.5240963697433472, 0.1515151560306549, 0.7018072009086609, 0.39177489280700684], [[0.52409637, 0.15151516, 0.7018072, 0.39177490000000004]]]
[141, [[" i want to buy a toy.", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one with the red eyes.", ""]], "<bin_100> <bin_699> <bin_627> <bin_891>", "", "openimages_v1.2/imgs/005cbd1f2b3e2b8b_Toy_Animal_3.jpg", [0.12734399735927582, 0.20750300586223602, 0.45468801259994507, 0.7866349816322327], [[0.1, 0.699883, 0.627344, 0.8921450000000001]]]
[142, [[" i want a piece of broccoli.", "which one do you want?"], [" i want the one next to the yellow vegetable.", "is it the one on the far right?"], [" yes, this is it.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_374> <bin_248> <bin_751> <bin_578>", "", "openimages_v1.2/imgs/e0b81ffd382fa5f5_Food_Vegetable_Broccoli_3.jpg", [0.0, 0.4428099989891052, 0.49346399307250977, 0.6356210112571716], [[0.374183, 0.24836599999999998, 0.751634, 0.578431]]]
[143, [[" that duck is so cute!", "which one?"], [" the one in front of us.", "is it the closest to us?"], [" yes.", ""]], "<bin_29> <bin_452> <bin_523> <bin_859>", "", "openimages_v1.2/imgs/02ad005224995a19_Bird_Duck_3.jpg", [0.028571000322699547, 0.45285695791244507, 0.5238100290298462, 0.8600000143051147], [[0.028571000000000003, 0.45285700000000007, 0.52381, 0.8599999999999999]]]
[144, [[" can you give me that flower pot?", "which one do you want?"], [" i want the small one.", "is it the one on the far right?"], [" no, it's the one in the middle.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_195> <bin_577> <bin_281> <bin_727>", "", "openimages_v1.2/imgs/5ba6f54081d23ad5_Houseplant_Plant_Flowerpot_4.jpg", [0.19625000655651093, 0.5266669988632202, 0.29249998927116394, 0.7233330011367798], [[0.1953125, 0.5777777777777777, 0.28125, 0.7277777777777777]]]
[145, [[" can you give me a pot of flowers?", "which pot do you want?"], [" i want the brown pot.", "is it the first pot from the right?"], [" no, it's the first pot from the left.", "ok, i see."], [" i want the first pot from the right.", ""]], "<bin_706> <bin_11> <bin_758> <bin_104>", "", "openimages_v1.2/imgs/acaf1ad35d3e3e47_Houseplant_Plant_Flower_Flowerpot_8.jpg", [0.0, 0.3083333373069763, 0.04374999925494194, 0.4333333373069763], [[0.70625, 0.010833, 0.75875, 0.104167]]]
[146, [[" can you pass me that bottle?", "which one do you want?"], [" i want the one on the right.", "ok, here you go."], [" i want the first one on the right.", ""]], "<bin_746> <bin_99> <bin_959> <bin_990>", "", "openimages_v1.2/imgs/6d80ce73afb67622_Drink_Bottle_3.jpg", [0.746874988079071, 0.09929600358009338, 0.9599999189376831, 0.9906179308891296], [[0.746875, 0.099296, 0.96, 0.990618]]]
[147, [[" i want to buy a bottle of wine.", "which bottle do you want?"], [" i want the bottle on the right.", "is it the first bottle from the right?"], [" no, it's the second bottle from the right.", "ok, i see."], [" i want the second bottle from the right.", ""]], "<bin_711> <bin_207> <bin_802> <bin_659>", "", "openimages_v1.2/imgs/0400dd1a4bb1cb4c_Food_Beer_Drink_Bottle_21.jpg", [0.7118750214576721, 0.20750001072883606, 0.8024999499320984, 0.6600000262260437], [[0.711875, 0.2075, 0.8025, 0.66]]]
[148, [[" that cow is so cute!", "which one are you talking about?"], [" the one next to the house.", "is it the one on the far right?"], [" no, it's the one on the left.", ""]], "<bin_307> <bin_558> <bin_383> <bin_699>", "", "openimages_v1.2/imgs/514de8037bcf100d_Cattle_House_Building_Animal_6.jpg", [0.36375001072883606, 0.5746650099754333, 0.4868749976158142, 0.7415040135383606], [[0.3075, 0.558187, 0.383125, 0.699279]]]
[149, [[" can you pass me that croissant?", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one next to the glass of water.", ""]], "<bin_301> <bin_477> <bin_511> <bin_669>", "", "openimages_v1.2/imgs/b9ace36b6fd157c0_Coffee cup_Coffee_Food_Saucer_Table_Tableware_Tea_4.jpg", [0.32617199420928955, 0.5208330154418945, 0.460938036441803, 0.6523439884185791], [[0.301758, 0.47786500000000004, 0.511719, 0.669271]]]
[150, [[" i want to buy a bottle of wine.", "which bottle do you want?"], [" i want the green bottle.", "is it the first bottle from the right?"], [" no, it's the first bottle from the left.", "ok, i see."], [" i want the second bottle from the left.", ""]], "<bin_205> <bin_302> <bin_361> <bin_998>", "", "openimages_v1.2/imgs/3256d9833056e859_Bottle_5.jpg", [0.03689799830317497, 0.18667900562286377, 0.1944970041513443, 0.9990619421005249], [[0.20512800000000003, 0.302064, 0.361476, 0.999062]]]
[151, [[" can you give me that cake?", "which one do you want?"], [" i want the one with three candles.", "is it the one on the far right?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_401> <bin_559> <bin_810> <bin_804>", "", "openimages_v1.2/imgs/9f11114d3d95ad8a_Person_Dessert_Drink_Food_Baked goods_Wine_Wine glass_Candle_Human face_Cake_3.jpg", [0.6484375, 0.45500001311302185, 0.671875, 0.6150000095367432], [[0.40125, 0.56, 0.8106250000000002, 0.805]]]
[152, [[" can you pass me that glass of orange juice?", "which one do you want?"], [" i want the one in front of the red sign.", "is it the one on the far right?"], [" no, it's the one in the middle.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_470> <bin_542> <bin_510> <bin_638>", "", "openimages_v1.2/imgs/74d8c449bebdb2a1_Person_Juice_Cocktail_Drink_Food_Man_9.jpg", [0.6893749833106995, 0.571025013923645, 0.7450000047683716, 0.6989650130271912], [[0.4703125, 0.5422535211267606, 0.5109375, 0.6384976525821596]]]
[153, [[" i want to eat a pancake.", "which one do you want?"], [" i want the one below.", "ok, here you go."], [" i want the one below.", ""]], "<bin_120> <bin_678> <bin_643> <bin_959>", "", "openimages_v1.2/imgs/ceea609450c82343_Pancake_3.jpg", [0.04062499850988388, 0.5708333253860474, 0.37187498807907104, 0.970833420753479], [[0.1203125, 0.6791666666666667, 0.64375, 0.9604166666666667]]]
[154, [[" that car is blocking me, please move it.", "which one are you talking about?"], [" the yellow one.", "is it the first one from the right?"], [" no, it's the first one from the left.", ""]], "<bin_340> <bin_664> <bin_833> <bin_998>", "", "openimages_v1.2/imgs/3a9b98a51f4976c5_Car_Taxi_4.jpg", [0.34062498807907104, 0.6647989749908447, 0.8337499499320984, 0.9990659952163696], [[0.340625, 0.664799, 0.83375, 0.999066]]]
[155, [[" can you give me a piece of bread?", "which piece do you want?"], [" i want the bottom piece.", "ok, here you go."], [" i want the bottom piece on the right.", ""]], "<bin_400> <bin_402> <bin_998> <bin_998>", "", "openimages_v1.2/imgs/2fa8fbff5fa99cd0_Bread_Dessert_3.jpg", [0.40072202682495117, 0.4025270342826843, 1.0, 1.0], [[0.40072203, 0.40252706, 1.0, 1.0]]]
[156, [[" i want to eat a banana.", "which one do you want?"], [" the one on top of the apple.", "is it the one on the far right?"], [" yes.", ""]], "<bin_463> <bin_165> <bin_707> <bin_391>", "", "openimages_v1.2/imgs/251c49d623e62eeb_Apple_Food_Fruit_Banana_5.jpg", [0.8187500238418579, 0.41110101342201233, 0.8818749189376831, 0.5578550100326538], [[0.46375, 0.165569, 0.708125, 0.39134500000000005]]]
[157, [[" that flag is so special!", "which side?"], [" the red side.", "is it the one on the far right?"], [" no, it's on the far left.", ""]], "<bin_1> <bin_703> <bin_39> <bin_887>", "", "openimages_v1.2/imgs/3ab820fc05f2c108_Balloon_Flag_4.jpg", [0.0, 0.7033330202102661, 0.038750000298023224, 0.887499988079071], [[0.0, 0.703333, 0.03875, 0.8875]]]
[158, [[" can you pass me that bowl?", "which one do you want?"], [" the green one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_463> <bin_33> <bin_682> <bin_240>", "", "openimages_v1.2/imgs/de94f227f5c54176_Coffee cup_Mixing bowl_Tableware_Bowl_Plate_Platter_5.jpg", [0.8262499570846558, 0.1683329939842224, 0.9993749856948853, 0.8716670274734497], [[0.46375, 0.033333, 0.683125, 0.23999999999999996]]]
[159, [[" i want to eat a tomato.", "which one do you want?"], [" the one in the bowl.", "is it the one on the left?"], [" no, it's the one on the right.", ""]], "<bin_739> <bin_160> <bin_895> <bin_350>", "", "openimages_v1.2/imgs/3e8133a9ad12d6d6_Cucumber_Food_Tomato_Vegetable_Salad_7.jpg", [0.5899999737739563, 0.41999998688697815, 0.796875, 0.7124999761581421], [[0.74, 0.16, 0.89625, 0.350833]]]
[160, [[" i want to buy a pumpkin.", "which one do you want?"], [" i want the yellow one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" i want the one in the upper left corner.", ""]], "<bin_190> <bin_17> <bin_311> <bin_230>", "", "openimages_v1.2/imgs/521415a353e677ba_Food_Fruit_Plant_Pumpkin_Squash (Plant)_Vegetable_6.jpg", [0.0, 0.0, 0.24140599370002747, 0.4375], [[0.189844, 0.016667, 0.310937, 0.230208]]]
[161, [[" that flower pot is so beautiful!", "which one are you talking about?"], [" the black one.", "is it the one on the far left?"], [" no, it's the one on the far right.", ""]], "<bin_664> <bin_809> <bin_799> <bin_963>", "", "openimages_v1.2/imgs/0cb491551929a2ee_Stairs_Houseplant_Porch_Plant_Tree_Flower_Building_Flowerpot_4.jpg", [0.6321200132369995, 0.8225000500679016, 0.7002800107002258, 0.8837500810623169], [[0.664799, 0.81, 0.800187, 0.9637499999999999]]]
[162, [[" can you pass me that candle?", "which one are you talking about?"], [" the purple one.", "is it the one on the far right?"], [" no, it's the one on the far left.", ""]], "<bin_201> <bin_213> <bin_323> <bin_546>", "", "openimages_v1.2/imgs/98ab184afd61590c_Plant_Candle_Flower_4.jpg", [0.20156249403953552, 0.21294362843036652, 0.32343751192092896, 0.5469728708267212], [[0.2015625, 0.21294363256784968, 0.3234375, 0.5469728601252609]]]
[163, [[" can you give me a tomato?", "which one do you want?"], [" i want the red one.", "is it the one on the far right?"], [" no, it's the one on the far left.", "okay, here you go."], [" i want the top one.", ""]], "<bin_73> <bin_7> <bin_326> <bin_393>", "", "openimages_v1.2/imgs/987761ccf847341d_Tomato_21.jpg", [0.0, 0.31425899267196655, 0.24937501549720764, 0.6894930005073547], [[0.073125, 0.006567, 0.32625, 0.393058]]]
[164, [[" that bird is so cute!", "which one are you talking about?"], [" the one on the far left.", ""]], "<bin_65> <bin_344> <bin_195> <bin_683>", "", "openimages_v1.2/imgs/497aafdf9d1baa8c_Bird_Plant_Goose_15.jpg", [0.06484399735927582, 0.3446660041809082, 0.1953119933605194, 0.6834700107574463], [[0.064844, 0.344666, 0.195312, 0.68347]]]
[165, [[" can you pass me that flag?", "which one are you talking about?"], [" the one in the middle.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_211> <bin_166> <bin_501> <bin_626>", "", "openimages_v1.2/imgs/9a6c0155d21d04cd_Flag_4.jpg", [0.2109375, 0.1666666716337204, 0.5015624761581421, 0.6267605423927307], [[0.2109375, 0.16666666666666666, 0.5015625, 0.6267605633802817]]]
[166, [[" i want to eat a piece of pizza.", "which piece do you want?"], [" i want the top piece.", "is it the first piece from the left?"], [" yes.", ""]], "<bin_205> <bin_213> <bin_488> <bin_572>", "", "openimages_v1.2/imgs/2ae139dbd8ebc544_Fast food_Food_Tableware_Baked goods_Pizza_3.jpg", [0.19599999487400055, 0.20989498496055603, 0.4690000116825104, 0.5742130279541016], [[0.205, 0.212894, 0.488, 0.572714]]]
[167, [[" can you give me that flower pot?", "which one do you want?"], [" i want the one at the bottom of the stairs.", "ok, here you go."], [" i want the one with red flowers.", ""]], "<bin_663> <bin_681> <bin_801> <bin_838>", "", "openimages_v1.2/imgs/0edda38d34c6c368_House_Building_Houseplant_Plant_Flower_Flowerpot_3.jpg", [0.6640625, 0.6814988255500793, 0.801562488079071, 0.8384075164794922], [[0.6640625, 0.6814988290398126, 0.8015625, 0.8384074941451991]]]
[168, [[" that cow is so cute!", "which one are you talking about?"], [" the one in the back.", ""]], "<bin_267> <bin_22> <bin_631> <bin_155>", "", "openimages_v1.2/imgs/15b3e8fba9b3c275_Flower_Cattle_Plant_Animal_3.jpg", [0.26750001311302185, 0.0216669999063015, 0.6312500238418579, 0.1550000011920929], [[0.2675, 0.021667, 0.63125, 0.155]]]
[169, [[" i want to buy a toy.", "which one do you want?"], [" i want the blue one.", "is it the first one from the left?"], [" yes, that's it.", "ok, i see."], [" yes, that's it.", ""]], "<bin_262> <bin_321> <bin_432> <bin_805>", "", "openimages_v1.2/imgs/3aa7a287b3a4bb54_Toy_9.jpg", [0.0, 0.22745899856090546, 0.21187500655651093, 0.787909984588623], [[0.261875, 0.321721, 0.4325, 0.805328]]]
[170, [[" i want to buy a toy.", "which one do you want?"], [" i want the orange one.", "is it the first one from the left?"], [" yes, that's it.", "ok, i see."], [" yes, that's it.", ""]], "<bin_155> <bin_532> <bin_353> <bin_761>", "", "openimages_v1.2/imgs/7df0db3ed23b86fb_Toy_Plant_Animal_11.jpg", [0.1550000011920929, 0.5247430205345154, 0.36000001430511475, 0.7488330006599426], [[0.155, 0.532213, 0.353125, 0.761905]]]
[171, [[" i want to buy a doll.", "which one do you want?"], [" i want the blue one.", "is it the first one from the left?"], [" no, it's the second one from the right.", "ok, i see."], [" yes, that's the one.", ""]], "<bin_349> <bin_619> <bin_516> <bin_789>", "", "openimages_v1.2/imgs/7df0db3ed23b86fb_Toy_Plant_Animal_11.jpg", [0.46812495589256287, 0.21942099928855896, 0.8612500429153442, 0.8048549890518188], [[0.349375, 0.619981, 0.51625, 0.789916]]]
[172, [[" that cow is so cute!", "which one are you talking about?"], [" the brown one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_460> <bin_405> <bin_615> <bin_610>", "", "openimages_v1.2/imgs/aab1c1431e1ad947_Cattle_Animal_5.jpg", [0.4609375, 0.4055555462837219, 0.6156250238418579, 0.6111111044883728], [[0.4609375, 0.40555555555555556, 0.615625, 0.6111111111111112]]]
[173, [[" this cat is so cute!", "which one?"], [" the one in front of us.", "is it the one on the far right?"], [" yes.", ""]], "<bin_1> <bin_156> <bin_866> <bin_997>", "", "openimages_v1.2/imgs/e31907a56ee2955e_Toy_Carnivore_Cat_Animal_2.jpg", [0.6481480002403259, 0.5405409932136536, 0.968518078327179, 0.952182948589325], [[0.0, 0.155925, 0.866667, 0.997921]]]
[174, [[" this chicken is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in front of us.", ""]], "<bin_262> <bin_328> <bin_634> <bin_812>", "", "openimages_v1.2/imgs/b855b48a33411d6e_Bird_Mammal_Chicken_Dog_Animal_3.jpg", [0.26249998807907104, 0.3283329904079437, 0.6343749761581421, 0.8125], [[0.2625, 0.328333, 0.634375, 0.8125]]]
[175, [[" can you pass me that bench?", "which one are you talking about?"], [" the one behind the girl in blue.", ""]], "<bin_1> <bin_35> <bin_452> <bin_190>", "", "openimages_v1.2/imgs/735a0b068f7e14c4_Balance beam_4.jpg", [0.0, 0.1153850108385086, 0.7718750238418579, 0.3996250033378601], [[0.000625, 0.034709, 0.4525, 0.190432]]]
[176, [[" can you pass me that pen?", "which one do you want?"], [" the one with the blue cap.", "is it the one on the far right?"], [" no, it's the one in the middle.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_386> <bin_537> <bin_599> <bin_662>", "", "openimages_v1.2/imgs/c2369ea55f28af17_Cosmetics_Office supplies_Lipstick_Pen_19.jpg", [0.768750011920929, 0.5761280059814453, 0.7993749976158142, 0.6184210181236267], [[0.386875, 0.537594, 0.599375, 0.662594]]]
[177, [[" can you help me get that bag?", "which one are you talking about?"], [" the blue one.", ""]], "<bin_103> <bin_173> <bin_528> <bin_890>", "", "openimages_v1.2/imgs/ec0837e0a98258f5_Plastic bag_3.jpg", [0.10312499850988388, 0.17333300411701202, 0.5287500023841858, 0.8911110162734985], [[0.103125, 0.173333, 0.52875, 0.891111]]]
[178, [[" this little meerkat is so cute!", "which one are you talking about?"], [" the one lying on its stomach.", ""]], "<bin_386> <bin_107> <bin_844> <bin_827>", "", "openimages_v1.2/imgs/921f182a889503b5_Carnivore_Animal_3.jpg", [0.38640400767326355, 0.10679600387811661, 0.8443650007247925, 0.8274829983711243], [[0.386404, 0.106796, 0.844365, 0.827483]]]
[179, [[" i want to buy a toy car.", "which one do you want?"], [" i want the one with two wheels.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" i want the second one from the left.", "ok, i see."], [" yes, this is the toy car.", ""]], "<bin_420> <bin_732> <bin_594> <bin_873>", "", "openimages_v1.2/imgs/f0b219c34af8afd6_Toy_Vehicle_Wheel_5.jpg", [0.11687500774860382, 0.8468040227890015, 0.13625000417232513, 0.9088350534439087], [[0.4206250000000001, 0.733083, 0.595, 0.87406]]]
[180, [[" i want to buy a toy.", "which one do you want?"], [" i want the red one.", "is it the first one from the right?"], [" no, it's the second one from the right.", "ok, i see."], [" yes, that's the one.", ""]], "<bin_632> <bin_77> <bin_848> <bin_321>", "", "openimages_v1.2/imgs/b00f6df690713fff_Toy_Land vehicle_Vehicle_Wheel_Tire_Car_4.jpg", [0.6328125, 0.07708333432674408, 0.848437488079071, 0.32083332538604736], [[0.6328125, 0.07708333333333334, 0.8484375, 0.32083333333333336]]]
[181, [[" can you pass me that book?", "which one do you want?"], [" the red one.", "is it the one on the far right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, that's the one.", ""]], "<bin_313> <bin_178> <bin_513> <bin_811>", "", "openimages_v1.2/imgs/f773a5b6a72d1e0a_Bookcase_Book_6.jpg", [0.031314998865127563, 0.17125000059604645, 0.3538619875907898, 0.8274999856948853], [[0.313152, 0.178125, 0.51357, 0.811875]]]
[182, [[" i want to buy a pumpkin.", "which one do you want?"], [" i want the biggest one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_518> <bin_832> <bin_670> <bin_997>", "", "openimages_v1.2/imgs/80279bd79579d046_Food_Fruit_Pumpkin_Squash (Plant)_3.jpg", [0.41624999046325684, 0.13708899915218353, 0.5431249737739563, 0.3295769989490509], [[0.51875, 0.8333333333333334, 0.6703125, 0.9976525821596244]]]
[183, [[" can you pass me that candle?", "which one do you want?"], [" i want the middle one.", "ok, here you go."], [" i want the middle one in the middle row.", ""]], "<bin_227> <bin_454> <bin_592> <bin_567>", "", "openimages_v1.2/imgs/2f6a41b7480481ab_Candle_4.jpg", [0.16041299700737, 0.28812500834465027, 0.5037519931793213, 0.41999998688697815], [[0.22716627634660422, 0.4546875, 0.5925058548009368, 0.5671875]]]
[184, [[" i want to buy a book.", "which one do you want?"], [" i want the one on the right.", "ok, here you go."], [" i want the first one on the right.", ""]], "<bin_516> <bin_361> <bin_997> <bin_857>", "", "openimages_v1.2/imgs/919e0c111df06726_Box_3.jpg", [0.5163400173187256, 0.36111098527908325, 0.9983659982681274, 0.8578430414199829], [[0.51634, 0.361111, 0.9983660000000001, 0.857843]]]
[185, [[" can you pass me that vase?", "which one do you want?"], [" i want the one on the table.", "is it the one on the left?"], [" no, it's the one on the right.", "ok, here you go."], [" i want the one with the purple flowers.", "ok, here you go."], [" i want the one on the right.", ""]], "<bin_847> <bin_369> <bin_883> <bin_508>", "", "openimages_v1.2/imgs/9566207f81bfcd6d_Chair_Couch_Vase_Bookcase_Houseplant_Table_Plant_Book_Flower_Furniture_Flowerpot_Shelf_2.jpg", [0.8475000262260437, 0.3691670000553131, 0.8837500810623169, 0.5083330273628235], [[0.8474999999999999, 0.369167, 0.88375, 0.508333]]]
[186, [[" i want to buy a lego car.", "which one do you want?"], [" i want the white one.", "is it the one on the left?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_99> <bin_550> <bin_569> <bin_943>", "", "openimages_v1.2/imgs/3a66ef4929893c84_Toy_Land vehicle_Tank_Vehicle_Weapon_Aircraft_5.jpg", [0.09937500208616257, 0.550832986831665, 0.5693749785423279, 0.9441670179367065], [[0.099375, 0.550833, 0.569375, 0.944167]]]
[187, [[" that cow is so cute!", "which one are you talking about?"], [" the big one.", "is it the one on the far right?"], [" no, it's the one in front.", ""]], "<bin_446> <bin_360> <bin_847> <bin_864>", "", "openimages_v1.2/imgs/d68b46aae5b8ddd7_Cattle_Bull_Animal_5.jpg", [0.42250004410743713, 0.35553500056266785, 0.7981250286102295, 0.8499060869216919], [[0.446875, 0.360225, 0.8474999999999999, 0.864916]]]
[188, [[" i want to eat an apple.", "which one do you want?"], [" i want the one in the lower left corner.", "ok, here you go."], [" i want the one on the left in the lower left corner.", ""]], "<bin_72> <bin_524> <bin_526> <bin_869>", "", "openimages_v1.2/imgs/8947839989306e28_Apple_Food_Fruit_4.jpg", [0.07196000218391418, 0.5249999761581421, 0.5260549783706665, 0.8700000047683716], [[0.07196, 0.525, 0.526055, 0.8699999999999999]]]
[189, [[" can you pass me that spoon?", "which one do you want?"], [" the small one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_641> <bin_751> <bin_712> <bin_827>", "", "openimages_v1.2/imgs/ee784978eea58567_Drink_Food_Tableware_Spoon_3.jpg", [0.6418750286102295, 0.7516670227050781, 0.7131249904632568, 0.8274999856948853], [[0.641875, 0.751667, 0.713125, 0.8275]]]
[190, [[" i want to buy a train.", "which one do you want?"], [" i want the blue one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, this is the one.", ""]], "<bin_562> <bin_553> <bin_997> <bin_997>", "", "openimages_v1.2/imgs/15e9619bfdd48882_Land vehicle_Train_Vehicle_6.jpg", [0.6473169922828674, 0.5233330130577087, 0.9923330545425415, 0.7333329916000366], [[0.5625, 0.5534441805225653, 0.9984375, 0.997624703087886]]]
[191, [[" this flower is so beautiful!", "which one?"], [" this is the one in front of us.", "is it the closest to us?"], [" yes.", ""]], "<bin_301> <bin_304> <bin_595> <bin_690>", "", "openimages_v1.2/imgs/e26ff9e7c08822f9_Plant_Rose_Flower_6.jpg", [0.27020201086997986, 0.5925930142402649, 0.4766409993171692, 0.8787879943847656], [[0.301136, 0.304714, 0.595328, 0.691077]]]
[192, [[" i want to rent a boat.", "which one do you want?"], [" i want the black one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, that's the one.", ""]], "<bin_97> <bin_112> <bin_323> <bin_356>", "", "openimages_v1.2/imgs/65701da15ae14f1b_Boat_Barge_Watercraft_Vehicle_3.jpg", [0.09668000042438507, 0.11197899281978607, 0.3232420086860657, 0.35677099227905273], [[0.09668, 0.111979, 0.323242, 0.356771]]]
[193, [[" that lion is so cute!", "which one are you talking about?"], [" the one in the distance.", ""]], "<bin_814> <bin_58> <bin_998> <bin_349>", "", "openimages_v1.2/imgs/0329163582e9b628_Carnivore_Lion_5.jpg", [0.8149999380111694, 0.05816100165247917, 0.9987499117851257, 0.3489679992198944], [[0.815, 0.058161, 0.9987500000000001, 0.348968]]]
[194, [[" i want to eat a cake.", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the first one on the left.", ""]], "<bin_1> <bin_306> <bin_268> <bin_998>", "", "openimages_v1.2/imgs/3cce7c5e2578f61b_Muffin_Dessert_Food_Baked goods_Pastry_6.jpg", [0.0, 0.3067539930343628, 0.26875001192092896, 0.9990619421005249], [[0.0, 0.306754, 0.26875, 0.999062], [0.283125, 0.260788, 0.758125, 0.997186]]]
[195, [[" can you pass me that pumpkin?", "which one do you want?"], [" the orange one.", "is it the one on the right?"], [" no, it's the one on the left.", "ok, here you go."], [" yes, that's it.", ""]], "<bin_472> <bin_1> <bin_775> <bin_131>", "", "openimages_v1.2/imgs/002973443937b47f_Apple_Snack_Food_Fruit_Table_Tableware_Plate_Pumpkin_Platter_Squash (Plant)_Vegetable_2.jpg", [0.20374998450279236, 0.42833298444747925, 0.3218750059604645, 0.6000000238418579], [[0.4725, 0.0, 0.775625, 0.130833]]]
[196, [[" i want to buy a pair of shoes.", "which pair do you want?"], [" i want the green one.", "is it the first one from the right?"], [" yes, that's it.", ""]], "<bin_364> <bin_397> <bin_754> <bin_767>", "", "openimages_v1.2/imgs/0a333b1b95e9586b_Human foot_Human leg_Sandal_High heels_3.jpg", [0.7549020051956177, 0.3741829991340637, 0.9803919792175293, 0.5735290050506592], [[0.364379, 0.397059, 0.754902, 0.767974]]]
[197, [[" i want to buy a toy.", "which one do you want?"], [" i want the green one.", "is it the first one from the right?"], [" yes, that's it.", ""]], "<bin_775> <bin_231> <bin_970> <bin_664>", "", "openimages_v1.2/imgs/0caa16ca52db74ed_Bird_Toy_6.jpg", [0.7756249904632568, 0.2308330088853836, 0.9712499380111694, 0.6650000214576721], [[0.775625, 0.230833, 0.9712499999999998, 0.6650000000000001]]]
[198, [[" i want to rent a car.", "which one do you want to buy?"], [" i want to buy the white one.", "is it the one on the far right?"], [" no, it's the one on the far left.", "okay, here you go."], [" it's the second one from the left.", ""]], "<bin_50> <bin_381> <bin_234> <bin_531>", "", "openimages_v1.2/imgs/88af919148def218_Land vehicle_Truck_Vehicle_Wheel_Van_Car_2.jpg", [0.05000000074505806, 0.3812499940395355, 0.234375, 0.53125], [[0.05, 0.38125, 0.234375, 0.53125]]]
[199, [[" i want a balloon.", "which one do you want?"], [" i want the white one.", "is it the first one from the right?"], [" yes, this is it.", "ok, i see."], [" yes, this is it.", ""]], "<bin_701> <bin_569> <bin_842> <bin_786>", "", "openimages_v1.2/imgs/9bd9cd38c2097544_Balloon_3.jpg", [0.7012500166893005, 0.5699999928474426, 0.8431249856948853, 0.786666989326477], [[0.70125, 0.57, 0.843125, 0.786667]]]
[200, [[" i want to eat a strawberry.", "which one do you want?"], [" i want the one on the right.", "ok, here you go."], [" i want the one on the far right.", ""]], "<bin_682> <bin_66> <bin_998> <bin_312>", "", "openimages_v1.2/imgs/e7ed12194571f37f_Food_Fruit_Strawberry_4.jpg", [0.6825000047683716, 0.06624999642372131, 0.9991670846939087, 0.31187498569488525], [[0.6825, 0.06625, 0.999167, 0.311875]]]
[201, [[" i want to buy a sofa.", "which one do you want?"], [" i want the blue one.", "is it the one on the left?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_50> <bin_379> <bin_517> <bin_718>", "", "openimages_v1.2/imgs/9b9532e49afe3269_Bottle_Person_Carnivore_Cat_Couch_Food_Fruit_House_Fireplace_Table_Furniture_Animal_1.jpg", [0.15215200185775757, 0.497996985912323, 0.4154149889945984, 0.7930570244789124], [[0.05, 0.37916666666666665, 0.5171875, 0.71875]]]
[202, [[" that duck is so cute!", "which one are you talking about?"], [" the white one.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_468> <bin_366> <bin_646> <bin_879>", "", "openimages_v1.2/imgs/6090786c386c9a6e_Bird_Goose_4.jpg", [0.46875, 0.3666670024394989, 0.6468750238418579, 0.8799999952316284], [[0.46875, 0.366667, 0.646875, 0.88]]]
[203, [[" that cow is so cute!", "which one are you talking about?"], [" the black one.", "is it the one on the far right?"], [" yes, it has a tag on its ear.", ""]], "<bin_717> <bin_372> <bin_998> <bin_712>", "", "openimages_v1.2/imgs/c4a9fafe93532b3b_Cattle_Animal_5.jpg", [0.7174999713897705, 0.37202900648117065, 0.9993749856948853, 0.7129799723625183], [[0.7175, 0.372029, 0.999375, 0.71298]]]
[204, [[" that duck is so cute!", "which one are you talking about?"], [" the one on the far left.", ""]], "<bin_104> <bin_732> <bin_164> <bin_801>", "", "openimages_v1.2/imgs/42adc6b44463317e_Bird_Duck_Building_Goose_Animal_8.jpg", [0.10375000536441803, 0.7330049872398376, 0.16374999284744263, 0.8019700050354004], [[0.10374999999999998, 0.733005, 0.16375, 0.80197]]]
[205, [[" that saxophone is so beautiful!", "which one are you talking about?"], [" the one in front of the guy in front of us.", ""]], "<bin_489> <bin_714> <bin_694> <bin_998>", "", "openimages_v1.2/imgs/0b633cd177ace160_Saxophone_3.jpg", [0.37812501192092896, 0.23076899349689484, 0.5637500286102295, 0.9090059995651245], [[0.489375, 0.714822, 0.694375, 0.999062]]]
[206, [[" i want to buy a bottle of wine.", "which bottle do you want?"], [" i want the bottle on the left.", "is it the first bottle from the left?"], [" yes, this is the bottle.", ""]], "<bin_236> <bin_1> <bin_513> <bin_992>", "", "openimages_v1.2/imgs/25b17c8df7b2400b_Beer_Drink_Bottle_Wine_5.jpg", [0.0, 0.0, 0.03828100115060806, 0.18958300352096558], [[0.23593800000000004, 0.0, 0.513281, 0.992708]]]
[207, [[" that flower is so beautiful!", "which one are you talking about?"], [" the yellow one.", "is it the one on the far right?"], [" no, it's on the far left.", ""]], "<bin_37> <bin_36> <bin_99> <bin_115>", "", "openimages_v1.2/imgs/76e97ac4f3bff9f3_Plant_Rose_Tree_Flower_4.jpg", [0.22062499821186066, 0.24583299458026886, 0.2918750047683716, 0.3283329904079437], [[0.0375, 0.035833, 0.099375, 0.115]]]
[208, [[" can you pass me that box?", "which one do you want?"], [" the orange one.", "is it the one on the far right?"], [" no, it's the one in the middle.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_416> <bin_60> <bin_746> <bin_961>", "", "openimages_v1.2/imgs/a20e5f23d0557a47_Box_5.jpg", [0.4168749749660492, 0.059999994933605194, 0.7462499737739563, 0.9621049761772156], [[0.416875, 0.060000000000000005, 0.74625, 0.962105]]]
[209, [[" i want a teapot.", "which one do you want?"], [" i want the yellow one.", "is it the first one from the left?"], [" yes, this is it.", ""]], "<bin_263> <bin_81> <bin_591> <bin_447>", "", "openimages_v1.2/imgs/76005037c3e150f7_Teapot_Snack_Dessert_Food_Baked goods_Pastry_6.jpg", [0.13593700528144836, 0.2739579975605011, 0.24687500298023224, 0.40104204416275024], [[0.263281, 0.08125, 0.591406, 0.447917]]]
[210, [[" can you pass me that plate?", "which one do you want?"], [" the empty one.", "is it the one on the left?"], [" yes.", ""]], "<bin_89> <bin_276> <bin_332> <bin_445>", "", "openimages_v1.2/imgs/0bab6ff2ae17a93c_Snack_Dessert_Drink_Dairy Product_Coffee cup_Food_Saucer_Table_Tableware_Plate_Tea_Platter_4.jpg", [0.013124999590218067, 0.17833299934864044, 0.21375000476837158, 0.31083300709724426], [[0.08875, 0.27583299999999994, 0.331875, 0.445833]]]
[211, [[" can you pass me that candle?", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the one on the far left.", ""]], "<bin_67> <bin_387> <bin_309> <bin_445>", "", "openimages_v1.2/imgs/ce0aa696dec671a8_Candle_Drink_3.jpg", [0.06718750298023224, 0.38749998807907104, 0.30937498807907104, 0.445833295583725], [[0.0671875, 0.3875, 0.309375, 0.44583333333333336]]]
[212, [[" i want a balloon.", "which one do you want?"], [" i want the blue one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is the one.", ""]], "<bin_370> <bin_85> <bin_626> <bin_440>", "", "openimages_v1.2/imgs/bc0bef7cf6887ba0_Balloon_8.jpg", [0.21437498927116394, 0.1933329999446869, 0.43562498688697815, 0.4949999749660492], [[0.370625, 0.085, 0.626875, 0.44]]]
[213, [[" i want to buy a pumpkin.", "which one do you want?"], [" i want the one in the lower left corner.", "is it the one on the far left?"], [" no, it's the one in the middle.", "okay, here you go."], [" yes, this is it.", ""]], "<bin_295> <bin_709> <bin_414> <bin_901>", "", "openimages_v1.2/imgs/00ebd5f7d83b5f83_Food_Pumpkin_Squash (Plant)_Vegetable_25.jpg", [0.19374999403953552, 0.7025761008262634, 0.32343751192092896, 0.8548009991645813], [[0.2953125, 0.7096018735362998, 0.4140625, 0.9016393442622951]]]
[214, [[" can you pass me that bottle of wine?", "which bottle do you want?"], [" the bottle in the middle.", "ok, here you go."], [" the bottle with the white label.", ""]], "<bin_478> <bin_1> <bin_851> <bin_909>", "", "openimages_v1.2/imgs/01b631ec361104d6_Drink_Bottle_Wine_3.jpg", [0.4689269959926605, 0.0, 0.8502820134162903, 0.9112499952316284], [[0.4783430000000001, 0.0, 0.852166, 0.9099999999999999]]]
[215, [[" can you help me move that bucket?", "which one?"], [" the one on the ground.", "the one on the far left?"], [" no, the one on the right.", ""]], "<bin_488> <bin_643> <bin_653> <bin_881>", "", "openimages_v1.2/imgs/05a46b228c3eb20b_Building_Barrel_Window_3.jpg", [0.48875004053115845, 0.6432639956474304, 0.6537500023841858, 0.8823530077934265], [[0.48875, 0.643264, 0.65375, 0.882353]]]
[216, [[" i want to buy a loaf of bread.", "which one do you want?"], [" i want the one in the bag.", "is it the one on the left?"], [" no, it's the one in the middle.", "okay, here you go."], [" yes, this is the one.", ""]], "<bin_151> <bin_341> <bin_342> <bin_610>", "", "openimages_v1.2/imgs/0f917ecf54ea1a2e_Fast food_Cheese_Snack_Dessert_Dairy Product_Food_5.jpg", [0.1368750035762787, 0.25422099232673645, 0.27125000953674316, 0.4043149948120117], [[0.15125, 0.34146299999999996, 0.341875, 0.610694]]]
[217, [[" can you pass me that dice?", "which one?"], [" the small one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_631> <bin_539> <bin_894> <bin_824>", "", "openimages_v1.2/imgs/b842afd99d0cf1f6_Dice_3.jpg", [0.6317689418792725, 0.5397111773490906, 0.8953068852424622, 0.8249098062515259], [[0.63176894, 0.5397112, 0.8953068999999999, 0.82490975]]]
[218, [[" that camel is so cute!", "which one are you talking about?"], [" the one behind the man.", ""]], "<bin_1> <bin_223> <bin_287> <bin_922>", "", "openimages_v1.2/imgs/1b459ce1f8a0e5d1_Camel_4.jpg", [0.47999995946884155, 0.3630389869213104, 0.6837499737739563, 0.634145975112915], [[0.0, 0.223265, 0.286875, 0.923077]]]
[219, [[" i want to eat a piece of cake.", "which piece do you want?"], [" i want the white one.", "is it the first piece from the left?"], [" yes, this is it.", ""]], "<bin_549> <bin_365> <bin_777> <bin_567>", "", "openimages_v1.2/imgs/10722200d89f40dd_Food_Tableware_Bowl_7.jpg", [0.07374999672174454, 0.36833301186561584, 0.3512499928474426, 0.6600000262260437], [[0.549375, 0.365, 0.7775, 0.5675]]]
[220, [[" i want a drink.", "which one do you want?"], [" i want the black one.", "is it the first one from the right?"], [" yes, that one.", "ok, i see."], [" yes, that's right.", ""]], "<bin_797> <bin_322> <bin_980> <bin_628>", "", "openimages_v1.2/imgs/ecf3cbe845d2b707_Juice_Drink_5.jpg", [0.7981250286102295, 0.32270199060440063, 0.981249988079071, 0.6285179853439331], [[0.798125, 0.322702, 0.98125, 0.628518]]]
[221, [[" that duck is so cute!", "which one are you talking about?"], [" the one swimming in the water.", "is it the one on the far right?"], [" no, it's the one on the far left.", ""]], "<bin_128> <bin_381> <bin_261> <bin_500>", "", "openimages_v1.2/imgs/aad4065b5f00502a_Bird_Duck_Goose_Animal_5.jpg", [0.1285180002450943, 0.38187500834465027, 0.2617259919643402, 0.5], [[0.128518, 0.381875, 0.261726, 0.5]]]
[222, [[" that zebra is so cute!", "which one are you talking about?"], [" the one in the middle.", ""]], "<bin_529> <bin_661> <bin_634> <bin_764>", "", "openimages_v1.2/imgs/134a9ea1af3d94fd_Plant_Tree_Zebra_Animal_4.jpg", [0.5296875238418579, 0.6611764430999756, 0.6343749761581421, 0.7647058963775635], [[0.5296875, 0.6611764705882353, 0.634375, 0.7647058823529411]]]
[223, [[" that car is blocking me, please move it.", "which one are you talking about?"], [" the red one.", "is it the first one from the right?"], [" no, it's the first one from the left.", ""]], "<bin_1> <bin_759> <bin_121> <bin_969>", "", "openimages_v1.2/imgs/84fa61d82ab235f9_Land vehicle_Truck_Vehicle_Auto part_Building_Van_Car_4.jpg", [0.0, 0.7600600123405457, 0.12125001102685928, 0.9701939821243286], [[0.0, 0.76006, 0.12125, 0.970194]]]
[224, [[" i want to buy a chicken.", "which one do you want?"], [" i want the brown one.", "is it the first one from the left?"], [" yes, that one.", "ok, i see."], [" yes, that's it.", ""]], "<bin_286> <bin_202> <bin_812> <bin_972>", "", "openimages_v1.2/imgs/4f297afaccfe9e65_Chicken_Animal_3.jpg", [0.2862499952316284, 0.2022469937801361, 0.8131250143051147, 0.9728459119796753], [[0.28625, 0.202247, 0.813125, 0.972846]]]
[225, [[" i want a bottle of beer.", "which bottle do you want?"], [" i want the bottle on the right.", "ok, here you go."], [" i want the bottle next to the orange box.", ""]], "<bin_636> <bin_28> <bin_817> <bin_778>", "", "openimages_v1.2/imgs/f75397411b150bfb_Drink_Bottle_4.jpg", [0.4925000071525574, 0.15916700661182404, 0.6012499928474426, 0.4000000059604645], [[0.63625, 0.028333, 0.818125, 0.779167]]]
[226, [[" i want to buy a phone.", "which one do you want?"], [" i want the one in my hand.", "is it the one with the black phone?"], [" yes, this is it.", ""]], "<bin_317> <bin_95> <bin_646> <bin_988>", "", "openimages_v1.2/imgs/0326b1ef4529097c_Mobile phone_3.jpg", [0.3171429932117462, 0.09462399780750275, 0.6471430063247681, 0.9892470240592957], [[0.317143, 0.09462400000000001, 0.647143, 0.989247]]]
[227, [[" that duck is so cute!", "which one are you talking about?"], [" the one on the far left.", ""]], "<bin_36> <bin_472> <bin_282> <bin_607>", "", "openimages_v1.2/imgs/8e9dabf8b73c9426_Bird_Duck_Animal_5.jpg", [0.035624999552965164, 0.4728209972381592, 0.28187501430511475, 0.607420027256012], [[0.035625, 0.47282100000000005, 0.281875, 0.60742]]]
[228, [[" that penguin is so cute!", "which one are you talking about?"], [" the biggest one.", ""]], "<bin_1> <bin_1> <bin_997> <bin_776>", "", "openimages_v1.2/imgs/31d3d97132fbb2f2_Bird_Penguin_Animal_3.jpg", [0.0, 0.0, 0.9980391263961792, 0.776562511920929], [[0.0, 0.0, 0.9980392156862745, 0.7765625]]]
[229, [[" this statue is so beautiful!", "which one?"], [" this is the one in front of us.", "is it the closest to us?"], [" yes.", ""]], "<bin_157> <bin_54> <bin_545> <bin_886>", "", "openimages_v1.2/imgs/18935844f4640142_Building_Bust_4.jpg", [0.1574999988079071, 0.05362199991941452, 0.5456249713897705, 0.887112021446228], [[0.1575, 0.053622, 0.545625, 0.887112]]]
[230, [[" that flag is so beautiful!", "which side are you talking about?"], [" the white side.", "is it the first side from the right?"], [" yes.", ""]], "<bin_766> <bin_814> <bin_868> <bin_923>", "", "openimages_v1.2/imgs/f1b4acbec6bbfc05_Flag_Sculpture_3.jpg", [0.7671639919281006, 0.8143750429153442, 0.8686569929122925, 0.9237500429153442], [[0.767164, 0.814375, 0.868657, 0.92375]]]
[231, [[" i want a glass of wine.", "which one do you want?"], [" i want the one next to the bottle.", "ok, here you go."], [" i want the one on the right.", ""]], "<bin_486> <bin_400> <bin_725> <bin_552>", "", "openimages_v1.2/imgs/74089d28a5de03ce_Drink_Tableware_Bottle_Wine_Wine glass_2.jpg", [0.48648601770401, 0.40051698684692383, 0.7258689999580383, 0.5529720187187195], [[0.486486, 0.40051699999999996, 0.7258689999999999, 0.552972]]]
[232, [[" i want to buy a doll.", "which one do you want?"], [" i want the biggest one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" yes, that's it.", ""]], "<bin_250> <bin_91> <bin_759> <bin_561>", "", "openimages_v1.2/imgs/40a88119b69472a3_Toy_Animal_5.jpg", [0.6034640073776245, 0.10625000298023224, 0.8851410150527954, 0.36937499046325684], [[0.250684, 0.090625, 0.759344, 0.56125]]]
[233, [[" that ostrich is so cute!", "which one are you talking about?"], [" the one with the head down.", "is it the one on the far right?"], [" yes.", ""]], "<bin_720> <bin_562> <bin_876> <bin_862>", "", "openimages_v1.2/imgs/0c2a9246f4ec37d1_Bird_Ostrich_8.jpg", [0.7203124761581421, 0.5625, 0.8765624761581421, 0.862500011920929], [[0.7203125, 0.5625, 0.8765625, 0.8625]]]
[234, [[" i want to eat an apple.", "which one do you want?"], [" i want the one next to the bird.", "is it the one on the far right?"], [" yes, this is it.", ""]], "<bin_528> <bin_373> <bin_782> <bin_654>", "", "openimages_v1.2/imgs/9218c9cbb81f1b78_Apple_Bird_Food_Fruit_Animal_2.jpg", [0.528124988079071, 0.3730680048465729, 0.7831249833106995, 0.6541569828987122], [[0.528125, 0.373068, 0.783125, 0.654157]]]
[235, [[" can you pass me that bowl?", "which one do you want?"], [" the one with the spoon in it.", "is it the one on the left?"], [" no, it's the one on the right.", "ok, here you go."], [" yes, this is the bowl.", ""]], "<bin_627> <bin_300> <bin_840> <bin_559>", "", "openimages_v1.2/imgs/b35d186018bbecee_Food_Egg (Food)_Mixing bowl_Tableware_Bowl_Platter_3.jpg", [0.43906301259994507, 0.21294400095939636, 0.6156250238418579, 0.44050103425979614], [[0.628125, 0.300626, 0.840625, 0.559499]]]
[236, [[" i want to eat an orange.", "which one do you want?"], [" i want the one hanging on the right.", "is it the one on the far right?"], [" yes, this is it.", ""]], "<bin_777> <bin_88> <bin_846> <bin_174>", "", "openimages_v1.2/imgs/189161d9cb3de9a8_Orange_15.jpg", [0.7777777910232544, 0.08806262165307999, 0.8466353416442871, 0.17416830360889435], [[0.7777777777777778, 0.08806262230919765, 0.8466353677621283, 0.17416829745596868]]]
[237, [[" that flower pot is so beautiful!", "which one are you talking about?"], [" the one on the far right.", ""]], "<bin_775> <bin_260> <bin_998> <bin_549>", "", "openimages_v1.2/imgs/f245ad4135957343_Tree_Flowerpot_5.jpg", [0.7762500047683716, 0.25999999046325684, 0.9993749856948853, 0.5491669774055481], [[0.77625, 0.26, 0.999375, 0.549167]]]
[238, [[" that penguin is so cute!", "which one are you talking about?"], [" the one in front of us.", "is it the one on the far right?"], [" no, it's the one in the middle.", ""]], "<bin_524> <bin_374> <bin_684> <bin_723>", "", "openimages_v1.2/imgs/a7709289ffc5326e_Bird_Penguin_Animal_5.jpg", [0.5249999761581421, 0.37470725178718567, 0.684374988079071, 0.7236533761024475], [[0.525, 0.3747072599531616, 0.684375, 0.7236533957845434]]]
[239, [[" i want to buy an ipod.", "which one do you want?"], [" i want the white one.", "is it the first one from the right?"], [" yes, this is it.", ""]], "<bin_308> <bin_368> <bin_674> <bin_758>", "", "openimages_v1.2/imgs/23ba8426a98e0afd_Ipod_3.jpg", [0.30812498927116394, 0.36833301186561584, 0.675000011920929, 0.7591670155525208], [[0.308125, 0.368333, 0.675, 0.759167]]]
[240, [[" this flower is so beautiful!", "which one?"], [" this is the one in front of us.", "is it the closest one to us?"], [" yes.", ""]], "<bin_357> <bin_277> <bin_631> <bin_755>", "", "openimages_v1.2/imgs/3b6c1e257a9ca1d6_Flower_Plant_Rose_4.jpg", [0.35707899928092957, 0.27770400047302246, 0.6313449740409851, 0.7556740045547485], [[0.357079, 0.277704, 0.631345, 0.755674]]]
[241, [[" i want to buy a bottle of wine.", "which bottle do you want?"], [" i want the green bottle.", "is it the first bottle from the left?"], [" yes, that bottle.", ""]], "<bin_195> <bin_50> <bin_498> <bin_212>", "", "openimages_v1.2/imgs/557690a370ee3578_Bottle_8.jpg", [0.25562500953674316, 0.7767350077629089, 0.515625, 0.9015009999275208], [[0.195, 0.049719, 0.49812500000000004, 0.212008]]]
[242, [[" can you give me a lemon?", "which one do you want?"], [" i want the one at the bottom.", "ok, here you go."], [" i want the one on the far left.", ""]], "<bin_175> <bin_840> <bin_341> <bin_972>", "", "openimages_v1.2/imgs/6641dc2beb04866a_Food_Fruit_Lemon_4.jpg", [0.17499999701976776, 0.8412500619888306, 0.3416669964790344, 0.9725000262260437], [[0.175, 0.8412500000000002, 0.341667, 0.9724999999999999]]]
[243, [[" i want to eat a strawberry.", "which one do you want?"], [" i want the biggest one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "okay, i see."], [" i want the top one.", ""]], "<bin_365> <bin_4> <bin_672> <bin_363>", "", "openimages_v1.2/imgs/78f359a51b210fe4_Food_Fruit_Strawberry_4.jpg", [0.6168749928474426, 0.2728480100631714, 0.9199999570846558, 0.6251659989356995], [[0.365, 0.003974, 0.673125, 0.363576]]]