File size: 9,039 Bytes
6f8669b
 
 
 
 
 
 
 
 
 
 
 
 
65d79e3
d914368
6f8669b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6db2b96
 
6f8669b
 
 
 
 
6e8b4f6
83beac5
6f8669b
7032a5d
5dad358
85d5bae
eead8a4
 
61df7bb
480ce71
6f8669b
ca411f8
6f8669b
 
0231808
85d5bae
 
9227f5d
6f8669b
 
 
 
 
 
 
 
 
 
 
 
 
1bc6b45
ce897a6
6f8669b
 
 
6db2b96
6f8669b
480ce71
6db2b96
 
ca411f8
9227f5d
6db2b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7218336
ce897a6
6db2b96
 
 
 
 
480ce71
6f8669b
bf4597b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0a2978
47bf365
 
640cf00
bf4597b
 
daf641a
faa93f2
5983968
6f8669b
6db2b96
 
 
 
c1d9208
6f8669b
 
 
 
 
 
4024f11
 
 
 
 
 
 
 
 
 
 
 
 
6db2b96
162cc9c
a23ed6b
a3be0e0
a23ed6b
85d5bae
a3be0e0
 
c9288cc
ca411f8
a106563
5791824
6db2b96
bf4597b
 
 
 
 
 
 
 
 
6db2b96
ff9ef5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d177d66
640cf00
ff9ef5b
c5d7db5
a106563
20f285d
47bf365
 
daf641a
bf4597b
 
d177d66
c1d9208
bf4597b
ff9ef5b
d177d66
 
4024f11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import matplotlib.pyplot as plt
import numpy as np
from six import BytesIO
from PIL import Image
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_op
import tarfile
import wget 
import gradio as gr
from huggingface_hub import snapshot_download
import os 
import cv2
from tqdm import tqdm

PATH_TO_LABELS = 'data/label_map.pbtxt'   
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

def pil_image_as_numpy_array(pilimg):

    img_array = tf.keras.utils.img_to_array(pilimg)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array
    
def load_image_into_numpy_array(path):
                                    
    image = None
    image_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(image_data))
    return pil_image_as_numpy_array(image)            

def load_model(model_repo_id):
    download_dir = snapshot_download(model_repo_id)
    saved_model_dir = os.path.join(download_dir, "saved_model")
    detection_model = tf.saved_model.load(saved_model_dir)
    return detection_model


def predict(pilimg,Threshold):
    
    image_np = pil_image_as_numpy_array(pilimg)
    
    if Threshold is None or Threshold == 0:
        Threshold=threshold_d
    else:
        Threshold= float(Threshold)
        
    return predict2(image_np,Threshold),predict3(image_np,Threshold),Threshold

def predict2(image_np,Threshold):

    results = detection_model(image_np)
    
    # if Threshold is None or Threshold == 0:
    #     Threshold=threshold_d
        
    # different object detection models have additional results
    result = {key:value.numpy() for key,value in results.items()}
    
    label_id_offset = 0
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections[0],
        result['detection_boxes'][0],
        (result['detection_classes'][0] + label_id_offset).astype(int),
        result['detection_scores'][0],
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=20,
        min_score_thresh=Threshold,#0.38,
        agnostic_mode=False,
        line_thickness=2)

    result_pil_img2 = tf.keras.utils.array_to_img(image_np_with_detections[0])
    
    return result_pil_img2


def predict3(image_np,Threshold):
    
    results = detection_model2(image_np)

    # different object detection models have additional results
    result = {key:value.numpy() for key,value in results.items()}
    
    label_id_offset = 0
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections[0],
        result['detection_boxes'][0],
        (result['detection_classes'][0] + label_id_offset).astype(int),
        result['detection_scores'][0],
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=20,
        min_score_thresh=Threshold,#.38,
        agnostic_mode=False,
        line_thickness=2)

    result_pil_img4 = tf.keras.utils.array_to_img(image_np_with_detections[0])
    
    return result_pil_img4

# def detect_video(video):
#     # Create a video capture object
#     cap = cv2.VideoCapture(video)

#     # Process frames in a loop
#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         # Expand dimensions since model expects images to have shape: [1, None, None, 3]
#         image_np_expanded = np.expand_dims(frame, axis=0)

#         # Run inference
#         output_dict = model(image_np_expanded)

#         # Extract detections
#         boxes = output_dict['detection_boxes'][0].numpy()
#         scores = output_dict['detection_scores'][0].numpy()
#         classes = output_dict['detection_classes'][0].numpy().astype(np.int64)

#         # Draw bounding boxes and labels
#         image_np_with_detections = viz_utils.visualize_boxes_and_labels_on_image_array(
#             frame,
#             boxes,
#             classes,
#             scores,
#             category_index,
#             use_normalized_coordinates=True,
#             max_boxes_to_draw=20,
#             min_score_thresh=.5,
#             agnostic_mode=False)

#         # Yield the processed frame
#         yield image_np_with_detections

#     # Release resources
#     cap.release()

a = os.path.join(os.path.dirname(__file__), "data/c_base_detected.mp4")  # Video
b = os.path.join(os.path.dirname(__file__), "data/c_tuned_detected.mp4")  # Video

# def video_demo(video1, video2):
#     return [video1, video2]

label_id_offset = 0    
threshold_d= 0.38
REPO_ID = "apailang/mytfodmodel"
detection_model = load_model(REPO_ID)
REPO_ID2 = "apailang/mytfodmodeltuned"
detection_model2 = load_model(REPO_ID2)

samples_folder = 'data'
# pil_image = Image.open(image_path)
# image_arr = pil_image_as_numpy_array(pil_image)

# predicted_img = predict(image_arr)
# predicted_img.save('predicted.jpg')

test1 = os.path.join(os.path.dirname(__file__), "data/test1.jpeg")  
test2 = os.path.join(os.path.dirname(__file__), "data/test2.jpeg")  
test3 = os.path.join(os.path.dirname(__file__), "data/test3.jpeg")  
test4 = os.path.join(os.path.dirname(__file__), "data/test4.jpeg")
test5 = os.path.join(os.path.dirname(__file__), "data/test5.jpeg")
test6 = os.path.join(os.path.dirname(__file__), "data/test6.jpeg")
test7 = os.path.join(os.path.dirname(__file__), "data/test7.jpeg")
test8 = os.path.join(os.path.dirname(__file__), "data/test8.jpeg")
test9 = os.path.join(os.path.dirname(__file__), "data/test9.jpeg")
test10 = os.path.join(os.path.dirname(__file__), "data/test10.jpeg")
test11 = os.path.join(os.path.dirname(__file__), "data/test11.jpeg")
test12 = os.path.join(os.path.dirname(__file__), "data/test12.jpeg")

base_image = gr.Interface(
    fn=predict,
#    inputs=[gr.Image(type="pil"),gr.Slider(minimum=0.01, maximum=1, value=0.38 ,label="Threshold",info="[not in used]to set prediction confidence threshold")],
    inputs=[gr.Image(type="pil"),gr.Slider(minimum=0.05, maximum=1,step=0.05,value=threshold_d ,label="To change default 0.38 prediction confidence Threshold. Range 0.05 to 1",info="Select any image below to start, you may amend threshold after first inference")],

    outputs=[gr.Image(type="pil",label="Base Model Inference"),gr.Image(type="pil",label="Tuned Model Inference"),gr.Textbox(label="Both images inferenced threshold")],
    title="Luffy and Chopper Head detection. SSD mobile net V2 320x320 trained with animated characters only",
    description="Upload a Image for prediction or click on below examples. Prediction confident is defaut to >38%, you may adjust after first inference",
    examples=
        [[test1],[test2],[test3],[test4],[test5],[test6],[test7],[test8],[test9],[test10],[test11],[test12],],
    cache_examples=True,examples_per_page=12 #,label="select image with 0.38 threshold to inference, you may amend threshold after inference"
    )

# tuned_image = gr.Interface(
#     fn=predict3,
#     inputs=gr.Image(type="pil"),
#     outputs=gr.Image(type="pil"),
#     title="Luffy and Chopper face detection on images. Result comparison of base vs tuned SSD mobile net V2 320x320",
#     description="Upload a Image for prediction or click on below examples. Mobile net tuned with data Augmentation. Prediction confident >38%",
#     examples=[[test1],[test2],[test3],[test4],[test5],[test6],[test7],[test8],[test9],[test10],[test11],[test12],],
#     cache_examples=True
#     )#.launch(share=True)

# a = os.path.join(os.path.dirname(__file__), "data/a.mp4")  # Video
# b = os.path.join(os.path.dirname(__file__), "data/b.mp4")  # Video
# c = os.path.join(os.path.dirname(__file__), "data/c.mp4")  # Video

# video_out_file = os.path.join(samples_folder,'detected' + '.mp4')

# stt_demo = gr.Interface(
#     fn=display_two_videos,
#     inputs=gr.Video(),
#     outputs=gr.Video(type="mp4",label="Detected Video"),
#     examples=[
#         [a],
#         [b],
#         [c],
#     ],
#     cache_examples=False
# )


video = gr.Interface(
    fn=lambda x,y: [x,y], #video_demo,
    inputs=[gr.Video(label="Base Model Video",interactive=False),gr.Video(label="Tuned Model Video",interactive=False)], 
    outputs=[gr.Video(label="Base Model Inferenced Video"), gr.Video(label="Tuned Model Inferenced Video")],
    examples=[
        [a, b]
    ],
    title="Luffy and Chopper face detection on video Result comparison of base vs tuned SSD mobile net V2 320x320",
    description="Model has been customed trained to detect Character of Luffy and Chopper with Prediction confident >10%. Videos are pre-inferenced to reduce load time. (Browser zoom out to view right columne - top (base model inference) & bottom(tuned model inference)) "
)

demo = gr.TabbedInterface([base_image, video], ["Images", "Video"])


if __name__ == "__main__":
    demo.launch()