|
import streamlit as st |
|
import tensorflow as tf |
|
import numpy as np |
|
|
|
|
|
tf.random.set_seed(42) |
|
|
|
|
|
AUTO = tf.data.AUTOTUNE |
|
BATCH_SIZE = 1 |
|
NUM_SAMPLES = 32 |
|
POS_ENCODE_DIMS = 16 |
|
EPOCHS = 20 |
|
H = 100 |
|
W = 100 |
|
focal = 138.88 |
|
|
|
def encode_position(x): |
|
"""Encodes the position into its corresponding Fourier feature. |
|
|
|
Args: |
|
x: The input coordinate. |
|
|
|
Returns: |
|
Fourier features tensors of the position. |
|
""" |
|
positions = [x] |
|
for i in range(POS_ENCODE_DIMS): |
|
for fn in [tf.sin, tf.cos]: |
|
positions.append(fn(2.0 ** i * x)) |
|
return tf.concat(positions, axis=-1) |
|
|
|
|
|
def get_rays(height, width, focal, pose): |
|
"""Computes origin point and direction vector of rays. |
|
|
|
Args: |
|
height: Height of the image. |
|
width: Width of the image. |
|
focal: The focal length between the images and the camera. |
|
pose: The pose matrix of the camera. |
|
|
|
Returns: |
|
Tuple of origin point and direction vector for rays. |
|
""" |
|
|
|
i, j = tf.meshgrid( |
|
tf.range(width, dtype=tf.float32), |
|
tf.range(height, dtype=tf.float32), |
|
indexing="xy", |
|
) |
|
|
|
|
|
transformed_i = (i - width * 0.5) / focal |
|
|
|
|
|
transformed_j = (j - height * 0.5) / focal |
|
|
|
|
|
directions = tf.stack([transformed_i, -transformed_j, -tf.ones_like(i)], axis=-1) |
|
|
|
|
|
camera_matrix = pose[:3, :3] |
|
height_width_focal = pose[:3, -1] |
|
|
|
|
|
transformed_dirs = directions[..., None, :] |
|
camera_dirs = transformed_dirs * camera_matrix |
|
ray_directions = tf.reduce_sum(camera_dirs, axis=-1) |
|
ray_origins = tf.broadcast_to(height_width_focal, tf.shape(ray_directions)) |
|
|
|
|
|
return (ray_origins, ray_directions) |
|
|
|
|
|
def render_flat_rays(ray_origins, ray_directions, near, far, num_samples, rand=False): |
|
"""Renders the rays and flattens it. |
|
|
|
Args: |
|
ray_origins: The origin points for rays. |
|
ray_directions: The direction unit vectors for the rays. |
|
near: The near bound of the volumetric scene. |
|
far: The far bound of the volumetric scene. |
|
num_samples: Number of sample points in a ray. |
|
rand: Choice for randomising the sampling strategy. |
|
|
|
Returns: |
|
Tuple of flattened rays and sample points on each rays. |
|
""" |
|
|
|
|
|
t_vals = tf.linspace(near, far, num_samples) |
|
if rand: |
|
|
|
|
|
shape = list(ray_origins.shape[:-1]) + [num_samples] |
|
noise = tf.random.uniform(shape=shape) * (far - near) / num_samples |
|
t_vals = t_vals + noise |
|
|
|
|
|
rays = ray_origins[..., None, :] + ( |
|
ray_directions[..., None, :] * t_vals[..., None] |
|
) |
|
rays_flat = tf.reshape(rays, [-1, 3]) |
|
rays_flat = encode_position(rays_flat) |
|
return (rays_flat, t_vals) |
|
|
|
|
|
def map_fn(pose): |
|
"""Maps individual pose to flattened rays and sample points. |
|
|
|
Args: |
|
pose: The pose matrix of the camera. |
|
|
|
Returns: |
|
Tuple of flattened rays and sample points corresponding to the |
|
camera pose. |
|
""" |
|
(ray_origins, ray_directions) = get_rays(height=H, width=W, focal=focal, pose=pose) |
|
(rays_flat, t_vals) = render_flat_rays( |
|
ray_origins=ray_origins, |
|
ray_directions=ray_directions, |
|
near=2.0, |
|
far=6.0, |
|
num_samples=NUM_SAMPLES, |
|
rand=True, |
|
) |
|
return (rays_flat, t_vals) |
|
|
|
|
|
def render_rgb_depth(model, rays_flat, t_vals, rand=True, train=True): |
|
"""Generates the RGB image and depth map from model prediction. |
|
|
|
Args: |
|
model: The MLP model that is trained to predict the rgb and |
|
volume density of the volumetric scene. |
|
rays_flat: The flattened rays that serve as the input to |
|
the NeRF model. |
|
t_vals: The sample points for the rays. |
|
rand: Choice to randomise the sampling strategy. |
|
train: Whether the model is in the training or testing phase. |
|
|
|
Returns: |
|
Tuple of rgb image and depth map. |
|
""" |
|
|
|
if train: |
|
predictions = model(rays_flat) |
|
else: |
|
predictions = model.predict(rays_flat) |
|
predictions = tf.reshape(predictions, shape=(BATCH_SIZE, H, W, NUM_SAMPLES, 4)) |
|
|
|
|
|
rgb = tf.sigmoid(predictions[..., :-1]) |
|
sigma_a = tf.nn.relu(predictions[..., -1]) |
|
|
|
|
|
delta = t_vals[..., 1:] - t_vals[..., :-1] |
|
|
|
if rand: |
|
delta = tf.concat( |
|
[delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, H, W, 1))], axis=-1 |
|
) |
|
alpha = 1.0 - tf.exp(-sigma_a * delta) |
|
else: |
|
delta = tf.concat( |
|
[delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, 1))], axis=-1 |
|
) |
|
alpha = 1.0 - tf.exp(-sigma_a * delta[:, None, None, :]) |
|
|
|
|
|
exp_term = 1.0 - alpha |
|
epsilon = 1e-10 |
|
transmittance = tf.math.cumprod(exp_term + epsilon, axis=-1, exclusive=True) |
|
weights = alpha * transmittance |
|
rgb = tf.reduce_sum(weights[..., None] * rgb, axis=-2) |
|
|
|
if rand: |
|
depth_map = tf.reduce_sum(weights * t_vals, axis=-1) |
|
else: |
|
depth_map = tf.reduce_sum(weights * t_vals[:, None, None], axis=-1) |
|
return (rgb, depth_map) |
|
|
|
|
|
def get_translation_t(t): |
|
"""Get the translation matrix for movement in t.""" |
|
matrix = [ |
|
[1, 0, 0, 0], |
|
[0, 1, 0, 0], |
|
[0, 0, 1, t], |
|
[0, 0, 0, 1], |
|
] |
|
return tf.convert_to_tensor(matrix, dtype=tf.float32) |
|
|
|
|
|
def get_rotation_phi(phi): |
|
"""Get the rotation matrix for movement in phi.""" |
|
matrix = [ |
|
[1, 0, 0, 0], |
|
[0, tf.cos(phi), -tf.sin(phi), 0], |
|
[0, tf.sin(phi), tf.cos(phi), 0], |
|
[0, 0, 0, 1], |
|
] |
|
return tf.convert_to_tensor(matrix, dtype=tf.float32) |
|
|
|
|
|
def get_rotation_theta(theta): |
|
"""Get the rotation matrix for movement in theta.""" |
|
matrix = [ |
|
[tf.cos(theta), 0, -tf.sin(theta), 0], |
|
[0, 1, 0, 0], |
|
[tf.sin(theta), 0, tf.cos(theta), 0], |
|
[0, 0, 0, 1], |
|
] |
|
return tf.convert_to_tensor(matrix, dtype=tf.float32) |
|
|
|
|
|
def pose_spherical(theta, phi, t): |
|
""" |
|
Get the camera to world matrix for the corresponding theta, phi |
|
and t. |
|
""" |
|
c2w = get_translation_t(t) |
|
c2w = get_rotation_phi(phi / 180.0 * np.pi) @ c2w |
|
c2w = get_rotation_theta(theta / 180.0 * np.pi) @ c2w |
|
c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w |
|
return c2w |
|
|
|
|
|
def show_rendered_image(r,theta,phi): |
|
|
|
c2w = pose_spherical(theta, phi, r) |
|
|
|
ray_oris, ray_dirs = get_rays(H, W, focal, c2w) |
|
rays_flat, t_vals = render_flat_rays( |
|
ray_oris, ray_dirs, near=2.0, far=6.0, num_samples=NUM_SAMPLES, rand=False |
|
) |
|
|
|
rgb, depth = render_rgb_depth( |
|
nerf_loaded, rays_flat[None, ...], t_vals[None, ...], rand=False, train=False |
|
) |
|
return(rgb[0], depth[0]) |
|
|
|
|
|
|
|
st.title('NeRF:3D volumetric rendering with NeRF') |
|
st.markdown("Authors: [Aritra Roy Gosthipathy](https://twitter.com/ariG23498) and [Ritwik Raha](https://twitter.com/ritwik_raha)") |
|
st.markdown("## Description") |
|
st.markdown("[NeRF](https://arxiv.org/abs/2003.08934) proposes an ingenious way to synthesize novel views of a scene by modelling the volumetric scene function through a neural network.") |
|
st.markdown("## Interactive Demo") |
|
|
|
|
|
nerf_loaded = tf.keras.models.load_model("nerf", compile=False) |
|
|
|
|
|
r = 4.0 |
|
theta = st.slider("Enter a value for Θ:", min_value=0.0, max_value=360.0) |
|
phi = -30.0 |
|
color, depth = show_rendered_image(r, theta, phi) |
|
|
|
col1, col2= st.columns(2) |
|
|
|
with col1: |
|
color = tf.keras.utils.array_to_img(color) |
|
st.image(color, caption="Color Image", clamp=True, width=300) |
|
|
|
with col2: |
|
depth = tf.keras.utils.array_to_img(depth[..., None]) |
|
st.image(depth, caption="Depth Map", clamp=True, width=300) |
|
|
|
st.markdown("## Tutorials") |
|
st.markdown("- [Keras](https://keras.io/examples/vision/nerf/)") |
|
st.markdown("- [PyImageSearch NeRF 1](https://www.pyimagesearch.com/2021/11/10/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-1/)") |
|
st.markdown("- [PyImageSearch NeRF 2](https://www.pyimagesearch.com/2021/11/17/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-2/)") |
|
st.markdown("- [PyImageSearch NeRF 3](https://www.pyimagesearch.com/2021/11/24/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-3/)") |
|
|
|
st.markdown("## Credits") |
|
st.markdown("- [PyImageSearch](https://www.pyimagesearch.com/)") |
|
st.markdown("- [JarvisLabs.ai GPU credits](https://jarvislabs.ai/)") |
|
|