from groundingdino.util.inference import load_model, load_image, predict, annotate, Model | |
import cv2 | |
CONFIG_PATH = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py" | |
CHECKPOINT_PATH = "./groundingdino_swint_ogc.pth" | |
DEVICE = "cuda" | |
IMAGE_PATH = "assets/demo7.jpg" | |
TEXT_PROMPT = "Horse. Clouds. Grasses. Sky. Hill." | |
BOX_TRESHOLD = 0.35 | |
TEXT_TRESHOLD = 0.25 | |
FP16_INFERENCE = True | |
image_source, image = load_image(IMAGE_PATH) | |
model = load_model(CONFIG_PATH, CHECKPOINT_PATH) | |
if FP16_INFERENCE: | |
image = image.half() | |
model = model.half() | |
boxes, logits, phrases = predict( | |
model=model, | |
image=image, | |
caption=TEXT_PROMPT, | |
box_threshold=BOX_TRESHOLD, | |
text_threshold=TEXT_TRESHOLD, | |
device=DEVICE, | |
) | |
annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases) | |
cv2.imwrite("annotated_image.jpg", annotated_frame) |