NIRVANALAN
commited on
Commit
•
87c126b
1
Parent(s):
c1d9c6a
release file
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- LICENSE +10 -0
- README.md +255 -4
- assets/ffhq_eval_pose.pt +0 -0
- assets/objv_eval_pose.pt +0 -0
- assets/shapenet_eval_pose.pt +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_albedo.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_hdr.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_mr.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_nd.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_ng.exr +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006.json +47 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006.png +0 -0
- assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006_albedo.png +0 -0
LICENSE
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
S-Lab License 1.0
|
2 |
+
|
3 |
+
Copyright 2023 S-Lab
|
4 |
+
|
5 |
+
Redistribution and use for non-commercial purpose in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
6 |
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
7 |
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
8 |
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
9 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
10 |
+
4. In the event that redistribution and/or use for commercial purpose in source or binary forms, with or without modification is required, please contact the contributor(s) of the work.
|
README.md
CHANGED
@@ -1,5 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
|
3 |
+
<h1>
|
4 |
+
LN3Diff: Scalable Latent Neural Fields Diffusion for Speedy 3D Generation
|
5 |
+
</h1>
|
6 |
+
|
7 |
+
<div>
|
8 |
+
<a href='https://github.com/NIRVANALAN' target='_blank'>Yushi Lan</a><sup>1</sup> 
|
9 |
+
<a href='https://hongfz16.github.io' target='_blank'>Fangzhou Hong</a><sup>1</sup> 
|
10 |
+
<a href='https://williamyang1991.github.io/' target='_blank'>Shuai Yang</a><sup>2</sup> 
|
11 |
+
<a href='https://shangchenzhou.com/' target='_blank'>Shangchen Zhou</a><sup>1</sup> 
|
12 |
+
<a href='https://sg.linkedin.com/in/xuyi-meng-673779208' target='_blank'>Xuyi Meng</a><sup>1</sup> 
|
13 |
+
<br>
|
14 |
+
<a href='https://xingangpan.github.io/' target='_blank'>Xingang Pan</a>
|
15 |
+
<sup>1</sup>
|
16 |
+
<a href='https://daibo.info/' target='_blank'>Bo Dai</a>
|
17 |
+
<sup>3</sup>
|
18 |
+
<a href='https://www.mmlab-ntu.com/person/ccloy/' target='_blank'>Chen Change Loy</a>
|
19 |
+
<sup>1</sup>  
|
20 |
+
</div>
|
21 |
+
<div>
|
22 |
+
S-Lab, Nanyang Technological University<sup>1</sup>;
|
23 |
+
<!--   -->
|
24 |
+
<br>
|
25 |
+
Wangxuan Institute of Computer Technology, Peking University<sup>2</sup>;
|
26 |
+
<br>
|
27 |
+
<!--   -->
|
28 |
+
Shanghai Artificial Intelligence Laboratory <sup>3</sup>
|
29 |
+
<!-- <br>
|
30 |
+
<sup>*</sup>corresponding author -->
|
31 |
+
</div>
|
32 |
+
|
33 |
+
<div>
|
34 |
+
<!-- <a target="_blank" href="https://colab.research.google.com/github/nirvanalan/E3DGE/blob/main/notebook/CVPR23_E3DGE_Demo.ipynb">
|
35 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
36 |
+
</a> -->
|
37 |
+
<a href="https://hits.seeyoufarm.com"><img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FNIRVANALAN%2FLN3Diff&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false"/></a>
|
38 |
+
</div>
|
39 |
+
<br>
|
40 |
+
<!-- <h4> -->
|
41 |
+
<strong>
|
42 |
+
LN3Diff is a feedforward 3D diffusion model that creates high-quality 3D object mesh from text within 8 V100-SECONDS.
|
43 |
+
</strong>
|
44 |
+
<!-- </h4> -->
|
45 |
+
|
46 |
+
<table>
|
47 |
+
<tr></tr>
|
48 |
+
<tr>
|
49 |
+
<td>
|
50 |
+
<img src="assets/t23d/standing-hund.gif">
|
51 |
+
</td>
|
52 |
+
<td>
|
53 |
+
<img src="assets/t23d/ufo.gif">
|
54 |
+
</td>
|
55 |
+
<td>
|
56 |
+
<img src="assets/t23d/mast.gif">
|
57 |
+
</td>
|
58 |
+
<td>
|
59 |
+
<img src="assets/t23d/cannon.gif">
|
60 |
+
</td>
|
61 |
+
<td>
|
62 |
+
<img src="assets/t23d/blue-plastic-chair.gif">
|
63 |
+
</td>
|
64 |
+
</tr>
|
65 |
+
|
66 |
+
|
67 |
+
<tr>
|
68 |
+
<td align='center' width='20%'>A standing hund.</td>
|
69 |
+
<td align='center' width='20%'>An UFO space aircraft.</td>
|
70 |
+
<td align='center' width='20%'>A sailboat with mast.</td>
|
71 |
+
<td align='center' width='20%'>An 18th century cannon.</td>
|
72 |
+
<td align='center' width='20%'>A blue plastic chair.</td>
|
73 |
+
</tr>
|
74 |
+
<tr></tr>
|
75 |
+
</table>
|
76 |
+
|
77 |
+
<!-- <br> -->
|
78 |
+
|
79 |
+
For more visual results, go checkout our <a href="https://nirvanalan.github.io/projects/ln3diff/" target="_blank">project page</a> :page_with_curl:
|
80 |
+
|
81 |
+
<strike>
|
82 |
+
Codes coming soon :facepunch:
|
83 |
+
</strike>
|
84 |
+
|
85 |
+
This repository contains the official implementation of LN3Diff:
|
86 |
+
Scalable Latent Neural Fields Diffusion for Speedy 3D Generation
|
87 |
+
|
88 |
+
</div>
|
89 |
+
|
90 |
---
|
91 |
+
|
92 |
+
<h4 align="center">
|
93 |
+
<a href="https://nirvanalan.github.io/projects/ln3diff/" target='_blank'>[Project Page]</a>
|
94 |
+
•
|
95 |
+
<a href="https://arxiv.org/pdf/2403.12019.pdf" target='_blank'>[arXiv]</a>
|
96 |
+
</h4>
|
97 |
+
|
98 |
+
|
99 |
+
## :mega: Updates
|
100 |
+
|
101 |
+
[03/2024] Initial release.
|
102 |
+
|
103 |
+
[04/2024] Inference and training codes on Objaverse, ShapeNet and FFHQ are released, including pre-trained model and training dataset.
|
104 |
+
|
105 |
+
|
106 |
+
## :dromedary_camel: TODO
|
107 |
+
|
108 |
+
- [x] Release the inference and training code.
|
109 |
+
- [x] Release the pre-trained checkpoints of ShapeNet and FFHQ.
|
110 |
+
- [x] Release the pre-trained checkpoints of T23D Objaverse model trained with 30K+ instances dataset.
|
111 |
+
- [x] Release the stage-1 VAE of Objaverse trained with 80K+ instances dataset.
|
112 |
+
- [ ] Add Gradio demo.
|
113 |
+
- [ ] Polish the dataset preparation and training doc.
|
114 |
+
- [ ] add metrics evaluation scripts and samples.
|
115 |
+
- [ ] Lint the code.
|
116 |
+
- [ ] Release the new T23D Objaverse model trained with 80K+ instances dataset.
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
## :handshake: Citation
|
121 |
+
If you find our work useful for your research, please consider citing the paper:
|
122 |
+
```
|
123 |
+
@misc{lan2024ln3diff,
|
124 |
+
title={LN3Diff: Scalable Latent Neural Fields Diffusion for Speedy 3D Generation},
|
125 |
+
author={Yushi Lan and Fangzhou Hong and Shuai Yang and Shangchen Zhou and Xuyi Meng and Bo Dai and Xingang Pan and Chen Change Loy},
|
126 |
+
year={2024},
|
127 |
+
eprint={2403.12019},
|
128 |
+
archivePrefix={arXiv},
|
129 |
+
primaryClass={cs.CV}
|
130 |
+
}
|
131 |
+
```
|
132 |
+
|
133 |
+
## :desktop_computer: Requirements
|
134 |
+
|
135 |
+
NVIDIA GPUs are required for this project.
|
136 |
+
We conduct all the training on NVIDIA V100-32GiB (ShapeNet, FFHQ) and NVIDIA A100-80GiB (Objaverse).
|
137 |
+
We have test the inference codes on NVIDIA V100.
|
138 |
+
We recommend using anaconda to manage the python environments.
|
139 |
+
|
140 |
+
The environment can be created via ```conda env create -f environment_ln3diff.yml```, and activated via ```conda activate ln3diff```.
|
141 |
+
If you want to reuse your own PyTorch environment, install the following packages in your environment:
|
142 |
+
|
143 |
+
```
|
144 |
+
# first, check whether you have installed pytorch (>=2.0) and xformer.
|
145 |
+
conda install -c conda-forge openexr-python git
|
146 |
+
pip install openexr lpips imageio kornia opencv-python tensorboard tqdm timm ffmpeg einops beartype imageio[ffmpeg] blobfile ninja lmdb webdataset opencv-python click torchdiffeq transformers
|
147 |
+
pip install git+https://github.com/nupurkmr9/vision-aided-gan.
|
148 |
+
```
|
149 |
+
|
150 |
+
## :running_woman: Inference
|
151 |
+
|
152 |
+
### Download Models
|
153 |
+
|
154 |
+
The pretrained stage-1 VAE and stage-2 LDM can be downloaded via [OneDrive](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/ErdRV9hCYvlBioObT1v_LZ4Bnwye3sv6p5qiVZPNhI9coQ?e=nJgp8t).
|
155 |
+
|
156 |
+
Put the downloaded checkpoints under ```checkpoints``` folder for inference. The checkpoints directory layout should be
|
157 |
+
|
158 |
+
checkpoints
|
159 |
+
├── ffhq
|
160 |
+
│ └── model_joint_denoise_rec_model1580000.pt
|
161 |
+
├── objaverse
|
162 |
+
│ ├── model_rec1680000.pt
|
163 |
+
│ └── model_joint_denoise_rec_model2310000.pt
|
164 |
+
├── shapenet
|
165 |
+
│ └── car
|
166 |
+
│ └── model_joint_denoise_rec_model1580000.pt
|
167 |
+
│ └── chair
|
168 |
+
│ └── model_joint_denoise_rec_model2030000.pt
|
169 |
+
│ └── plane
|
170 |
+
│ └── model_joint_denoise_rec_model770000.pt
|
171 |
+
└── ...
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
### Inference Commands
|
176 |
+
|
177 |
+
<strong>Note that to extract the mesh, 24GiB VRAM is required.</strong>
|
178 |
+
|
179 |
+
#### Stage-1 VAE 3D reconstruction
|
180 |
+
|
181 |
+
For (Objaverse) stage-1 VAE 3D reconstruction and extract VAE latents for diffusion learning, please run
|
182 |
+
|
183 |
+
```bash
|
184 |
+
bash shell_scripts/final_release/inference/sample_obajverse.sh
|
185 |
+
```
|
186 |
+
|
187 |
+
which shall give the following result:
|
188 |
+
|
189 |
+
|
190 |
+
The marching-cube extracted mesh can be visualized with Blender/MeshLab:
|
191 |
+
|
192 |
+
<img title="a title" alt="Mesh Visualization" src="./assets/stage1_vae_reconstruction/reconstruction_result/mesh-visualization.png">
|
193 |
+
|
194 |
+
**We upload the pre-extracted vae latents at [here](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/EnXixldDrKhDtrcuPM4vjQYBv06uY58F1mF7f7KVdZ19lQ?e=nXQNdm), which contains the correponding VAE latents (with shape 32x32x12) of 76K G-buffer Objaverse objects. Feel free to use them in your own task.**
|
195 |
+
|
196 |
+
For more G-buffer Objaverse examples, download the [demo data](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/EoyzVJbMyBhLoKFJbbsq6bYBi1paLwQxIDjTkO1KjI4b1g?e=sJc3rQ).
|
197 |
+
|
198 |
+
|
199 |
+
#### Stage-2 Text-to-3D
|
200 |
+
|
201 |
+
We train 3D latent diffusion model on top of the stage-1 extracted latents.
|
202 |
+
For the following bash inference file, to extract mesh from the generated tri-plane, set ```--export_mesh True```. To change the text prompt, set the ```prompt``` variable. For unconditional sampling, set the cfg guidance ```unconditional_guidance_scale=0```. Feel free to tune the cfg guidance scale to trade off diversity and fidelity.
|
203 |
+
|
204 |
+
Note that the diffusion sampling batch size is set to ```4```, which costs around 16GiB VRAM. The mesh extraction of a single instance costs 24GiB VRAM.
|
205 |
+
|
206 |
+
For text-to-3D on Objaverse, run
|
207 |
+
|
208 |
+
```bash
|
209 |
+
bash shell_scripts/final_release/inference/sample_obajverse.sh
|
210 |
+
```
|
211 |
+
|
212 |
+
For text-to-3D on ShapeNet, run one of the following commands (which conducts T23D on car, chair and plane.):
|
213 |
+
```bash
|
214 |
+
bash shell_scripts/final_release/inference/sample_shapenet_car_t23d.sh
|
215 |
+
```
|
216 |
+
|
217 |
+
```bash
|
218 |
+
bash shell_scripts/final_release/inference/sample_shapenet_chair_t23d.sh
|
219 |
+
```
|
220 |
+
|
221 |
+
```bash
|
222 |
+
bash shell_scripts/final_release/inference/sample_shapenet_plane_t23d.sh
|
223 |
+
```
|
224 |
+
|
225 |
+
For text-to-3D on FFHQ, run
|
226 |
+
|
227 |
+
```bash
|
228 |
+
bash shell_scripts/final_release/inference/sample_ffhq_t23d.sh
|
229 |
+
```
|
230 |
+
|
231 |
+
|
232 |
+
## :running_woman: Training
|
233 |
+
|
234 |
+
### Dataset
|
235 |
+
|
236 |
+
For Objaverse, we use the rendering provided by [G-buffer Objaverse](https://aigc3d.github.io/gobjaverse/). A demo subset for stage-1 VAE reconstruction can be downloaded from [here](https://entuedu-my.sharepoint.com/:u:/g/personal/yushi001_e_ntu_edu_sg/Eb6LX2x-EgJLpiHbhRxsN9ABnEaSyjG-tsVBcUr_dQ5dnQ?e=JXWQo1). Note that for Objaverse training, we pre-process the raw data into [wds-dataset](https://github.com/webdataset/webdataset) shards for fast and flexible loading. The sample shard data can be found in [here](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/ErtZQgnEH5ZItDqdUaiVbJgBe4nhZveJemQRqDW6Xwp7Zg?e=Zqt6Ss).
|
237 |
+
|
238 |
+
For ShapeNet, we render our own data with foreground mask for training, which can be downloaded from [here](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/EijBXIC_bUNOo0L3wnJKRqoBCqVnhhT_BReYRc1tc_0lrA?e=VQwWOZ). For training, we convert the raw data to LMDB for faster data loading. The pre-processed LMDB file can be downloaded from [here](https://entuedu-my.sharepoint.com/:f:/g/personal/yushi001_e_ntu_edu_sg/Ev7L8Als8K9JtLtj1G23Cc0BTNDbhCQPadxNLLVS7mV2FQ?e=C5woyE).
|
239 |
+
|
240 |
+
|
241 |
+
For FFHQ, we use the pre-processed dataset from [EG3D](https://github.com/NVlabs/eg3d) and compress it into LMDB, which can also be found in the onedrive link above.
|
242 |
+
|
243 |
+
|
244 |
+
### Training Commands
|
245 |
+
|
246 |
+
Coming soon.
|
247 |
+
|
248 |
+
|
249 |
+
## :newspaper_roll: License
|
250 |
+
|
251 |
+
Distributed under the S-Lab License. See `LICENSE` for more information.
|
252 |
+
|
253 |
+
|
254 |
+
## Contact
|
255 |
+
|
256 |
+
If you have any question, please feel free to contact us via `[email protected]` or Github issues.
|
assets/ffhq_eval_pose.pt
ADDED
Binary file (4.35 kB). View file
|
|
assets/objv_eval_pose.pt
ADDED
Binary file (4.72 kB). View file
|
|
assets/shapenet_eval_pose.pt
ADDED
Binary file (10.8 kB). View file
|
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
1.64323258,
|
17 |
+
0.0,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
1.2196297E-07,
|
24 |
+
1.00000012,
|
25 |
+
0.0
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.188542932,
|
29 |
+
0.0,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.9820651,
|
34 |
+
1.2196297E-07,
|
35 |
+
-0.188542932
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00000/00000_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
1.58724082,
|
17 |
+
0.425299883,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-0.258818865,
|
24 |
+
0.9659259,
|
25 |
+
2.14746585E-08
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.18211852,
|
29 |
+
0.0487984978,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.948601961,
|
34 |
+
-0.2541769,
|
35 |
+
-0.188542962
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00001/00001_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
1.42308116,
|
17 |
+
0.8216163,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-0.50000006,
|
24 |
+
0.8660254,
|
25 |
+
-6.586047E-09
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.163282961,
|
29 |
+
0.0942714661,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.8504932,
|
34 |
+
-0.4910325,
|
35 |
+
-0.188542932
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00002/00002_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
1.16194093,
|
17 |
+
1.16194093,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-0.707106769,
|
24 |
+
0.707106769,
|
25 |
+
-5.59717162E-10
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.13332,
|
29 |
+
0.13332,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.6944248,
|
34 |
+
-0.694424748,
|
35 |
+
-0.188542962
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00003/00003_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
0.821616232,
|
17 |
+
1.42308116,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-0.866025448,
|
24 |
+
0.49999994,
|
25 |
+
-8.742944E-09
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.09427146,
|
29 |
+
0.163282961,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.491032422,
|
34 |
+
-0.850493252,
|
35 |
+
-0.188542917
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00004/00004_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
0.425299734,
|
17 |
+
1.58724082,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-0.9659259,
|
24 |
+
0.258818924,
|
25 |
+
-4.48933068E-09
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
0.0487984866,
|
29 |
+
0.18211849,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
-0.254177,
|
34 |
+
-0.9486019,
|
35 |
+
-0.188542932
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_albedo.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_hdr.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_mr.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_nd.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00005/00005_ng.exr
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_depth": 5.0,
|
3 |
+
"bbox": [
|
4 |
+
[
|
5 |
+
-0.330194056,
|
6 |
+
-0.449999958,
|
7 |
+
-0.263895959
|
8 |
+
],
|
9 |
+
[
|
10 |
+
0.330194056,
|
11 |
+
0.450000018,
|
12 |
+
0.263895959
|
13 |
+
]
|
14 |
+
],
|
15 |
+
"origin": [
|
16 |
+
-7.182798E-08,
|
17 |
+
1.64323258,
|
18 |
+
0.315478027
|
19 |
+
],
|
20 |
+
"x_fov": 0.691150367,
|
21 |
+
"y_fov": 0.691150367,
|
22 |
+
"x": [
|
23 |
+
-1.0,
|
24 |
+
-4.37113847E-08,
|
25 |
+
1.07384328E-15
|
26 |
+
],
|
27 |
+
"y": [
|
28 |
+
-8.241472E-09,
|
29 |
+
0.1885429,
|
30 |
+
-0.982064962
|
31 |
+
],
|
32 |
+
"z": [
|
33 |
+
4.29274181E-08,
|
34 |
+
-0.982064962,
|
35 |
+
-0.1885429
|
36 |
+
],
|
37 |
+
"scale": [
|
38 |
+
0.0023696092,
|
39 |
+
0.0023696092,
|
40 |
+
0.0023696092
|
41 |
+
],
|
42 |
+
"offset": [
|
43 |
+
0.0,
|
44 |
+
-0.4037283,
|
45 |
+
-0.06950388
|
46 |
+
]
|
47 |
+
}
|
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006.png
ADDED
assets/stage1_vae_reconstruction/Objaverse/Animals/0/10120/campos_512_v4/00006/00006_albedo.png
ADDED