xj
[feat] add benchmark results and huge ckpt/logs
6f5c44e
["index", "text_input", "text_output", "text_gen", "image_path", "bbox_target", "all_target"]
[245, [[" can you give me that ball?", "which one do you want?"], [" i want the pink one.", "is it the one on the far right?"], [" yes, this is it.", "ok, here you go."], [" yes, this is it.", ""]], "<bin_434> <bin_420> <bin_576> <bin_634>", "", "openimages_v1.2/imgs/98f07cca2fddff43_Ball_Plant_Swimming pool_Flowerpot_3.jpg", [0.645312488079071, 0.3176470696926117, 0.800000011920929, 0.49176469445228577], [[0.434375, 0.419962, 0.57625, 0.634652]]]
[246, [[" that flower is so beautiful!", "which one are you talking about?"], [" the one in the lower right corner.", ""]], "<bin_839> <bin_611> <bin_939> <bin_697>", "", "openimages_v1.2/imgs/75e90295e269f683_Plant_Flower_Common sunflower_9.jpg", [0.8395870327949524, 0.6112499833106995, 0.9399629831314087, 0.6981250047683716], [[0.839587, 0.61125, 0.939963, 0.698125]]]
[247, [[" i want to buy a pair of shorts.", "which one do you want?"], [" i want the black one.", "is it the first one from the left?"], [" no, it's the first one from the right.", "ok, i see."], [" i want the first one from the left.", ""]], "<bin_339> <bin_786> <bin_490> <bin_906>", "", "openimages_v1.2/imgs/00531eacca42c7a7_Shorts_Sports equipment_5.jpg", [0.0, 0.8793455958366394, 0.04531250149011612, 0.9611452221870422], [[0.339193, 0.787053, 0.490885, 0.907155]]]
[248, [[" that baboon is so cute!", "which one?"], [" the big one.", "is it the one on the far right?"], [" yes.", ""]], "<bin_247> <bin_91> <bin_496> <bin_643>", "", "openimages_v1.2/imgs/9f45986e2299a6b3_Monkey_3.jpg", [0.24687500298023224, 0.09133489429950714, 0.49687498807907104, 0.6440281271934509], [[0.246875, 0.09133489461358314, 0.496875, 0.6440281030444965]]]
[249, [[" can you pass me that water bottle?", "which one do you want?"], [" i want the green one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, this is it.", ""]], "<bin_1> <bin_44> <bin_240> <bin_998>", "", "openimages_v1.2/imgs/4e215fab991b8f63_Drink_Bottle_3.jpg", [0.0, 0.04408999904990196, 0.23999997973442078, 0.9990619421005249], [[0.0, 0.04409, 0.24, 0.999062]]]
[250, [[" i want to buy a toy.", "which one do you want?"], [" i want the green one.", "is it the first one from the right?"], [" no, it's the first one from the left.", "ok, i see."], [" yes, that's it.", ""]], "<bin_419> <bin_802> <bin_576> <bin_948>", "", "openimages_v1.2/imgs/2f31ed01d5381966_Toy_5.jpg", [0.4190030097961426, 0.8024999499320984, 0.5763239860534668, 0.9493749737739563], [[0.419003, 0.8025, 0.576324, 0.9493750000000001]]]
[251, [[" can you pass me that faucet?", "which one do you want?"], [" i want the one on the left.", "ok, here you go."], [" i want the middle faucet.", ""]], "<bin_23> <bin_591> <bin_159> <bin_701>", "", "openimages_v1.2/imgs/09221feb8b5ba74a_Sink_Bidet_Shower_Tap_Bathroom cabinet_2.jpg", [0.05937499925494194, 0.5858330130577087, 0.13625000417232513, 0.6741669774055481], [[0.0234375, 0.5916666666666667, 0.159375, 0.7020833333333333], [0.890625, 0.7104166666666667, 0.9609375, 0.7520833333333333]]]