Spaces:
Runtime error
Runtime error
import gdown | |
import gradio as gr | |
import logging | |
import os | |
import cv2 | |
import numpy as np | |
import tensorflow as tf | |
from ai.detection import detect | |
from laeo_per_frame.interaction_per_frame_uncertainty import LAEO_computation | |
from utils.hpe import hpe, project_ypr_in2d | |
from utils.img_util import resize_preserving_ar, percentage_to_pixel, draw_key_points_pose, \ | |
visualize_vector, draw_axis, draw_axis_3d, draw_cones | |
# <a href="https://malga.unige.it/" target="_blank"><nobr>Lab MaLGa UniGe</nobr></a> | |
WEBSITE = """ | |
<div class="embed_hidden"> | |
<h1 style='text-align: center'>Head Pose Estimation and LAEO computation </h1> | |
<h2 style='text-align: center'> | |
<a target="_blank" href="https://github.com/Malga-Vision/LAEO_demo"> <nobr> Code for LAEO </nobr></a> | |
<br> | |
<a target="_blank" href="https://github.com/Malga-Vision/HHP-Net/tree/master"> <nobr> Code for HPE </nobr></a> | |
</h2> | |
<h2 style='text-align: center'> | |
<nobr><a href="https://github.com/Malga-Vision" target="_blank"><nobr>MaLGa Vision GitHub</nobr></a>  </nobr> | |
</h2> | |
<h3 style="text-align:center;"> | |
<a href="https://fede1995.github.io/" target="_blank"><nobr>Federico FT</nobr></a>   | |
</h3> | |
<h2> Description </h2> | |
<p> | |
This space illustrates a method for Head Pose Estimation and also LAEO detection. The code is based on experiments and research carried out at the UNiversity of Genoa (Italy) in the MaLGa Laboratory. | |
This demo has been set up by Federico Figari Tomenotti. | |
DISCLAIMER: does not work properly on smartphones and sometimes on Safari web browser. | |
</p> | |
<h2> Usage </h2> | |
<p> | |
The flags allow the user to choose what to display on the result image, and to change the sensitivity for the person detection algorithm. | |
The Head Pose orientation can be described only as one vector (arrow) or a triplet of angles: yaw, pitch and roll projected on the image plane. | |
The uncertainty result is the mean of the uncertainty compute on the three angles. | |
The run botton is needed to run the demo on an image after changing flag settings. | |
For every detailed explanation on the algorithms refer to the paper which will be out soon. | |
</p> | |
</div> | |
""" | |
WEBSITE_citation = """ | |
<h2 style='text-align: center'> | |
Citation | |
</h2> | |
If you find this code useful for your research, please use the following BibTeX entry. | |
``` | |
@inproceedings{cantarini2022hhp, | |
title={HHP-Net: A light Heteroscedastic neural network for Head Pose estimation with uncertainty}, | |
author={Cantarini, Giorgio and Tomenotti, Federico Figari and Noceti, Nicoletta and Odone, Francesca}, | |
booktitle={Proceedings of the IEEE/CVF Winter Conference on applications of computer vision}, | |
pages={3521--3530}, | |
year={2022} | |
} | |
```""" | |
def load_image(camera, ): | |
# Capture the video frame by frame | |
try: | |
ret, frame = camera.read() | |
return True, frame | |
except: | |
logging.Logger('Error reading frame') | |
return False, None | |
def demo_play(img, laeo=True, rgb=False, show_keypoints=True, only_face=False, Head_Pose_representation='Vector', detection_threshold=0.45): | |
# webcam in use | |
# gpus = tf.config.list_physical_devices('GPU') | |
# img = np.array(frame) | |
img_resized, new_old_shape = resize_preserving_ar(img, input_shape_od_model) | |
if not rgb: | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # covert at grey scale | |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # it is still grey scale but with 3 channels to add the colours of the points and lines | |
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |
else: # if RGB | |
# img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
pass | |
print('inference centernet') | |
detections, elapsed_time = detect(model, img_resized, detection_threshold, | |
new_old_shape) # detection classes boxes scores | |
# probably to draw on resized | |
# img_with_detections = draw_detections(img_resized, detections, max_boxes_to_draw, None, None, None) | |
# cv2.imshow("aa", img_with_detections) | |
det, kpt = percentage_to_pixel(img.shape, detections['detection_boxes'], detections['detection_scores'], | |
detections['detection_keypoints'], detections['detection_keypoint_scores']) | |
# center_xy, yaw, pitch, roll = head_pose_estimation(kpt, 'centernet', gaze_model=gaze_model) | |
# _________ extract hpe and print to img | |
people_list = [] | |
print('inferece hpe') | |
for j, kpt_person in enumerate(kpt): | |
yaw, pitch, roll, tdx, tdy = hpe(gaze_model, kpt_person, detector='centernet') | |
# img = draw_axis_3d(yaw[0].numpy()[0], pitch[0].numpy()[0], roll[0].numpy()[0], image=img, tdx=tdx, tdy=tdy, | |
# size=50) | |
people_list.append({'yaw' : yaw[0].numpy()[0], | |
'yaw_u' : yaw[0].numpy()[1], | |
'pitch' : pitch[0].numpy()[0], | |
'pitch_u' : pitch[0].numpy()[1], | |
'roll' : roll[0].numpy()[0], | |
'roll_u' : roll[0].numpy()[1], | |
'center_xy': [tdx, tdy] | |
}) | |
if show_keypoints: | |
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
for i in range(len(det)): | |
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
img = draw_key_points_pose(img, kpt[i], only_face=only_face) | |
# call LAEO | |
clip_uncertainty = 0.5 | |
binarize_uncertainty = False | |
if laeo: | |
interaction_matrix = LAEO_computation(people_list, clipping_value=clip_uncertainty, | |
clip=binarize_uncertainty) | |
else: | |
interaction_matrix = np.zeros((len(people_list), len(people_list))) | |
# coloured arrow print per person | |
print(f'Head pose representation: {Head_Pose_representation}') | |
def visualise_hpe(yaw, pitch, roll, image=None, tdx=None, tdy=None, size=50, yaw_uncertainty=-1, pitch_uncertainty=-1, roll_uncertainty=-1, openpose=False, title="", color=(255, 0, 0)): | |
if str(Head_Pose_representation).lower() == 'vector': | |
vector = project_ypr_in2d(person['yaw'], person['pitch'], person['roll']) | |
image = visualize_vector(image, [tdx, tdy], vector, title=title, color=color) | |
return image | |
elif str(Head_Pose_representation).lower() == 'axis': | |
image = draw_axis_3d(yaw, pitch, roll, image=image, tdx=tdx, tdy=tdy, size=size) | |
return image | |
elif str(Head_Pose_representation).lower() == 'cone': | |
_, image = draw_cones(yaw, pitch, roll, unc_yaw=yaw_uncertainty, unc_pitch=pitch_uncertainty, unc_roll=roll_uncertainty, image=image, tdx=tdx, tdy=tdy, size=size) | |
return image | |
else: | |
return image | |
for index, person in enumerate(people_list): | |
green = round((max(interaction_matrix[index, :])) * 255) | |
colour = (0, green, 0) | |
if green < 40: | |
colour = (255, 0, 0) | |
img = visualise_hpe(person['yaw'], person['pitch'], person['roll'], image=img, tdx=person['center_xy'][0], tdy=person['center_xy'][1], size=50, yaw_uncertainty=person['yaw_u'], pitch_uncertainty=person['pitch_u'], roll_uncertainty=person['roll_u'], title="", color=colour) | |
# vector = project_ypr_in2d(person['yaw'], person['pitch'], person['roll']) | |
# img = visualize_vector(img, person['center_xy'], vector, title="", | |
# color=colour) | |
uncertainty_mean = [i['yaw_u'] + i['pitch_u'] + i['roll_u'] for i in people_list] | |
uncertainty_mean_str = ''.join([str(round(i, 2)) + ' ' for i in uncertainty_mean]) | |
return img, uncertainty_mean_str | |
if __name__=='__main__': | |
if not os.path.exists("LAEO_demo_data"): | |
gdown.download_folder("https://drive.google.com/drive/folders/1nQ1Cb_tBEhWxy183t-mIcVH7AhAfa6NO?usp=drive_link", | |
use_cookies=False) | |
# Get the list of all files and directories | |
path = "LAEO_demo_data/examples" | |
dir_list = os.listdir(path) | |
print("Files and directories in '", path, "' :") | |
# prints all files | |
print(dir_list) | |
gaze_model_path = 'LAEO_demo_data/head_pose_estimation' | |
gaze_model = tf.keras.models.load_model(gaze_model_path, custom_objects={"tf": tf}) | |
path_to_model = 'LAEO_demo_data/keypoint_detector/centernet_hg104_512x512_kpts_coco17_tpu-32' | |
model = tf.saved_model.load(os.path.join(path_to_model, 'saved_model')) | |
input_shape_od_model = (512, 512) | |
# params | |
min_score_thresh, max_boxes_to_draw, min_distance = .25, 50, 1.5 | |
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) | |
function_to_call = demo_play | |
# outputs = gr.Image(shape=(512, 512)) | |
live = True | |
title = "Head Pose Estimation and LAEO" | |
print(os.getcwd()) | |
with gr.Blocks() as demo: | |
gr.Markdown(WEBSITE) | |
with gr.Tab("demo_webcam"): | |
with gr.Row(): | |
with gr.Column(): | |
input_img = gr.Image(label="Input Image", source="webcam") | |
button = gr.Button(label="RUN", type="default") | |
laeo = gr.Checkbox(value=True, label="LAEO", info="Compute and display LAEO") | |
rgb = gr.Checkbox(value=False, label="rgb", info="Display output on W/B image") | |
show_keypoints = gr.Checkbox(value=True, label="show_keypoints", info="Display keypoints on image") | |
show_keypoints_only_face = gr.Checkbox(value=True, label="show_keypoints_only_face", | |
info="Display only face keypoints on image") | |
Head_Pose_representation = gr.Radio(["Vector", "Axis", "None"], label="Head_Pose_representation", | |
info="Which representation to show", value="Vector") | |
detection_threshold = gr.Slider(0.01, 1, value=0.45, step=0.01, interactive=True, | |
label="detection_threshold", info="Choose in [0, 1]") | |
with gr.Column(): | |
outputs = gr.Image(label="Output Image", shape=(512, 512)) | |
uncert = gr.Label(label="Uncertainty", value="0.0") | |
input_img.change(function_to_call, inputs=[input_img, laeo, rgb, show_keypoints, show_keypoints_only_face, | |
Head_Pose_representation, detection_threshold], outputs=[outputs, uncert]) | |
button.click(function_to_call, inputs=[input_img, laeo, rgb, show_keypoints, show_keypoints_only_face, | |
Head_Pose_representation, detection_threshold], outputs=[outputs, uncert]) | |
with gr.Tab("demo_upload"): | |
with gr.Row(): | |
with gr.Column(): | |
input_img = gr.Image(label="Input Image", source="upload") | |
button = gr.Button(label="RUN", type="default") | |
laeo = gr.Checkbox(value=True, label="LAEO", info="Compute and display LAEO") | |
rgb = gr.Checkbox(value=False, label="rgb", info="Display output on W/B image") | |
show_keypoints = gr.Checkbox(value=True, label="show_keypoints", info="Display keypoints on image") | |
show_keypoints_only_face = gr.Checkbox(value=True, label="show_keypoints_only_face", | |
info="Display only face keypoints on image") | |
Head_Pose_representation = gr.Radio(["Vector", "Axis", "None"], | |
label="Head_Pose_representation", | |
info="Which representation to show", value="Vector") | |
detection_threshold = gr.Slider(0.01, 1, value=0.45, step=0.01, interactive=True, | |
label="detection_threshold", info="Choose in [0, 1]") | |
with gr.Column(): | |
outputs = gr.Image(height=238, width=585, label="Output Image") | |
uncert = gr.Label(label="Uncertainty", value="0.0") | |
examples_text =gr.Markdown("## Image Examples") | |
examples = gr.Examples([["LAEO_demo_data/examples/1.jpg"], ["LAEO_demo_data/examples/300wlp_0.png"], | |
["LAEO_demo_data/examples/AWFL_2.jpg"], | |
["LAEO_demo_data/examples/BIWI_3.png"]], inputs=input_img,) # add all other flags | |
input_img.change(function_to_call, inputs=[input_img, laeo, rgb, show_keypoints, show_keypoints_only_face, | |
Head_Pose_representation, detection_threshold], | |
outputs=[outputs, uncert]) | |
button.click(function_to_call, inputs=[input_img, laeo, rgb, show_keypoints, show_keypoints_only_face, | |
Head_Pose_representation, detection_threshold], | |
outputs=[outputs, uncert]) | |
gr.Markdown(WEBSITE_citation) | |
demo.launch() |