onnx-community
/

Florence-2-base-ft

Image-Text-to-Text

Transformers.js

text2text-generation

text-generation

Model card Files Files and versions Community

Florence-2-base-ft / preprocessor_config.json

Xenova's picture

Xenova HF staff

Update preprocessor_config.json

c08885e verified 5 months ago

2.67 kB

	{
	"_valid_processor_keys": [
	"images",
	"do_resize",
	"size",
	"resample",
	"do_center_crop",
	"crop_size",
	"do_rescale",
	"rescale_factor",
	"do_normalize",
	"image_mean",
	"image_std",
	"do_convert_rgb",
	"return_tensors",
	"data_format",
	"input_data_format"
	],
	"crop_size": {
	"height": 768,
	"width": 768
	},
	"do_center_crop": false,
	"do_convert_rgb": null,
	"do_normalize": true,
	"do_rescale": true,
	"do_resize": true,
	"image_mean": [
	0.485,
	0.456,
	0.406
	],
	"image_processor_type": "CLIPImageProcessor",
	"image_seq_length": 577,
	"image_std": [
	0.229,
	0.224,
	0.225
	],
	"processor_class": "Florence2Processor",
	"resample": 3,
	"rescale_factor": 0.00392156862745098,
	"size": {
	"height": 768,
	"width": 768
	},
	"tasks_answer_post_processing_type": {
	"<OCR>": "pure_text",
	"<OCR_WITH_REGION>": "ocr",
	"<CAPTION>": "pure_text",
	"<DETAILED_CAPTION>": "pure_text",
	"<MORE_DETAILED_CAPTION>": "pure_text",
	"<OD>": "description_with_bboxes",
	"<DENSE_REGION_CAPTION>": "description_with_bboxes",
	"<CAPTION_TO_PHRASE_GROUNDING>": "phrase_grounding",
	"<REFERRING_EXPRESSION_SEGMENTATION>": "polygons",
	"<REGION_TO_SEGMENTATION>": "polygons",
	"<OPEN_VOCABULARY_DETECTION>": "description_with_bboxes_or_polygons",
	"<REGION_TO_CATEGORY>": "pure_text",
	"<REGION_TO_DESCRIPTION>": "pure_text",
	"<REGION_TO_OCR>": "pure_text",
	"<REGION_PROPOSAL>": "bboxes"
	},
	"task_prompts_without_inputs": {
	"<OCR>": "What is the text in the image?",
	"<OCR_WITH_REGION>": "What is the text in the image, with regions?",
	"<CAPTION>": "What does the image describe?",
	"<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
	"<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
	"<OD>": "Locate the objects with category name in the image.",
	"<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
	"<REGION_PROPOSAL>": "Locate the region proposals in the image."
	},
	"task_prompts_with_input": {
	"<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
	"<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
	"<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
	"<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
	"<REGION_TO_CATEGORY>": "What is the region {input}?",
	"<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
	"<REGION_TO_OCR>": "What text is in the region {input}?"
	}
	}