import gradio as gr import cv2 import tempfile from ultralytics import YOLOv10 import pandas as pd df = pd.read_csv('image_class.csv') df = df[['name', 'class']] df.drop_duplicates(inplace=True) # print(df) def yolov10_inference(image, video, image_size, conf_threshold, iou_threshold): model = YOLOv10('./drug_yolov10.pt') # model = YOLOv10('./pills_yolov10.pt') if image: results = model.predict(source=image, imgsz=image_size, conf=conf_threshold, iou=iou_threshold) annotated_image = results[0].plot() # Print the detected objects' information (class, coordinates, and probability) box = results[0].boxes cls = [int(c) for c in box.cls.tolist()] cnf = [round(f,2) for f in box.conf.tolist()] clcf = '\n'.join([f'Class:{cls[i]} , Confidence:{cnf[i]*100}%' for i in range(len(cls))]) #list(zip(cls,cnf)) name = '\n'.join([df[df['class']==n]['name'].item() for n in cls]) # print(cls) # print(name) print(type(clcf)) # print("Object type:", box.cls) # print("Coordinates:", box.xyxy) # print("Probability:", box.conf) # print('box.class data tyupe', type(box.cls.tolist())) return annotated_image[:, :, ::-1], None, clcf, name else: video_path = tempfile.mktemp(suffix=".webm") with open(video_path, "wb") as f: with open(video, "rb") as g: f.write(g.read()) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video_path = tempfile.mktemp(suffix=".webm") out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold, iou=iou_threshold) annotated_frame = results[0].plot() out.write(annotated_frame) cap.release() out.release() return None, output_video_path def yolov10_inference_for_examples(image, image_size, conf_threshold, iou_threshold): annotated_image, _, output_class, output_name = yolov10_inference(image, None, image_size, conf_threshold, iou_threshold) return annotated_image#, None, output_class, output_name def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): image = gr.Image(type="pil", label="Image", visible=True) video = gr.Video(label="Video", visible=False) input_type = gr.Radio( choices=["Image", "Video"], value="Image", label="Input Type", ) image_size = gr.Slider( label="Image Size", minimum=0, maximum=1280, step=10, value=640, ) conf_threshold = gr.Slider( label="Confidence Threshold", minimum=0.0, maximum=1.0, step=0.05, value=0.25, ) iou_threshold = gr.Slider( label="IOU Threshold", minimum=0, maximum=1, step=0.1, value=0.6, ) yolov10_infer = gr.Button(value="Detect Objects") with gr.Column(): output_image = gr.Image(type="numpy", label="Annotated Image", visible=True) output_video = gr.Video(label="Annotated Video", visible=False) output_name = gr.Textbox(label='Predicted Drug Name') output_name.change(outputs=output_name) output_class = gr.Textbox(label='Predicted Class') output_class.change(outputs=output_class) def update_visibility(input_type): image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False) output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True) return image, video, output_image, output_video input_type.change( fn=update_visibility, inputs=[input_type], outputs=[image, video, output_image, output_video], ) def run_inference(image, video, image_size, conf_threshold, iou_threshold, input_type): if input_type == "Image": return yolov10_inference(image, None, image_size, conf_threshold, iou_threshold) else: return yolov10_inference(None, video, image_size, conf_threshold, iou_threshold) yolov10_infer.click( fn=run_inference, inputs=[image, video, image_size, conf_threshold, iou_threshold, input_type], outputs=[output_image, output_video, output_class, output_name], ) gr.Examples( examples = [ ['./RXBASE-600_00071-1014-68_NLMIMAGE10_5715ABFD.jpg', 280, 0.2, 0.6], ['./RXNAV-600_13668-0095-90_RXNAVIMAGE10_D145E8EF.jpg', 640, 0.2, 0.7], ['./RXBASE-600_00074-7126-13_NLMIMAGE10_C003606B.jpg', 640, 0.2, 0.8], ], fn=yolov10_inference_for_examples, inputs=[ image, image_size, conf_threshold, iou_threshold, ], outputs=[output_image], cache_examples='lazy', ) gradio_app = gr.Blocks() with gradio_app: gr.HTML( """

YOLOv10: Real-Time End-to-End Object Detection

""") gr.HTML( """

arXiv | github

""") with gr.Row(): with gr.Column(): app() if __name__ == '__main__': gradio_app.launch()