delete useless file
Browse files
configs/deploy/clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000_deploy.yaml
DELETED
@@ -1,181 +0,0 @@
|
|
1 |
-
name: "0630_clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000"
|
2 |
-
#wandb:
|
3 |
-
# project: "image_diffuser"
|
4 |
-
# offline: false
|
5 |
-
|
6 |
-
|
7 |
-
training:
|
8 |
-
steps: 500000
|
9 |
-
use_amp: true
|
10 |
-
ckpt_path: ""
|
11 |
-
base_lr: 1.e-4
|
12 |
-
gradient_clip_val: 5.0
|
13 |
-
gradient_clip_algorithm: "norm"
|
14 |
-
every_n_train_steps: 5000
|
15 |
-
val_check_interval: 1024
|
16 |
-
limit_val_batches: 16
|
17 |
-
|
18 |
-
dataset:
|
19 |
-
target: michelangelo.data.asl_webdataset.MultiAlignedShapeLatentModule
|
20 |
-
params:
|
21 |
-
batch_size: 38
|
22 |
-
num_workers: 4
|
23 |
-
val_num_workers: 4
|
24 |
-
buffer_size: 256
|
25 |
-
return_normal: true
|
26 |
-
random_crop: false
|
27 |
-
surface_sampling: true
|
28 |
-
pc_size: &pc_size 4096
|
29 |
-
image_size: 384
|
30 |
-
mean: &mean [0.5, 0.5, 0.5]
|
31 |
-
std: &std [0.5, 0.5, 0.5]
|
32 |
-
cond_stage_key: "image"
|
33 |
-
|
34 |
-
meta_info:
|
35 |
-
3D-FUTURE:
|
36 |
-
render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
|
37 |
-
tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
|
38 |
-
|
39 |
-
ABO:
|
40 |
-
render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
|
41 |
-
tar_folder: "/root/workspace/datasets/make_tars/ABO"
|
42 |
-
|
43 |
-
GSO:
|
44 |
-
render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
|
45 |
-
tar_folder: "/root/workspace/datasets/make_tars/GSO"
|
46 |
-
|
47 |
-
TOYS4K:
|
48 |
-
render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
|
49 |
-
tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
|
50 |
-
|
51 |
-
3DCaricShop:
|
52 |
-
render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
|
53 |
-
tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
|
54 |
-
|
55 |
-
Thingi10K:
|
56 |
-
render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
|
57 |
-
tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
|
58 |
-
|
59 |
-
shapenet:
|
60 |
-
render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
|
61 |
-
tar_folder: "/root/workspace/datasets/make_tars/shapenet"
|
62 |
-
|
63 |
-
pokemon:
|
64 |
-
render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
|
65 |
-
tar_folder: "/root/workspace/datasets/make_tars/pokemon"
|
66 |
-
|
67 |
-
objaverse:
|
68 |
-
render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
|
69 |
-
tar_folder: "/root/workspace/datasets/make_tars/objaverse"
|
70 |
-
|
71 |
-
model:
|
72 |
-
target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
|
73 |
-
params:
|
74 |
-
first_stage_config:
|
75 |
-
target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
|
76 |
-
params:
|
77 |
-
shape_module_cfg:
|
78 |
-
target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
|
79 |
-
params:
|
80 |
-
num_latents: &num_latents 256
|
81 |
-
embed_dim: &embed_dim 64
|
82 |
-
point_feats: 3 # normal
|
83 |
-
num_freqs: 8
|
84 |
-
include_pi: false
|
85 |
-
heads: 12
|
86 |
-
width: 768
|
87 |
-
num_encoder_layers: 8
|
88 |
-
num_decoder_layers: 16
|
89 |
-
use_ln_post: true
|
90 |
-
init_scale: 0.25
|
91 |
-
qkv_bias: false
|
92 |
-
use_checkpoint: false
|
93 |
-
aligned_module_cfg:
|
94 |
-
target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
|
95 |
-
params:
|
96 |
-
clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
|
97 |
-
# clip_model_version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
|
98 |
-
|
99 |
-
loss_cfg:
|
100 |
-
target: torch.nn.Identity
|
101 |
-
|
102 |
-
cond_stage_config:
|
103 |
-
target: michelangelo.models.conditional_encoders.encoder_factory.FrozenCLIPImageGridEmbedder
|
104 |
-
params:
|
105 |
-
version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
|
106 |
-
# version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
|
107 |
-
zero_embedding_radio: 0.1
|
108 |
-
|
109 |
-
first_stage_key: "surface"
|
110 |
-
cond_stage_key: "image"
|
111 |
-
scale_by_std: false
|
112 |
-
|
113 |
-
denoiser_cfg:
|
114 |
-
target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
|
115 |
-
params:
|
116 |
-
input_channels: *embed_dim
|
117 |
-
output_channels: *embed_dim
|
118 |
-
n_ctx: *num_latents
|
119 |
-
width: 768
|
120 |
-
layers: 6 # 2 * 6 + 1 = 13
|
121 |
-
heads: 12
|
122 |
-
context_dim: 1024
|
123 |
-
init_scale: 1.0
|
124 |
-
skip_ln: true
|
125 |
-
use_checkpoint: true
|
126 |
-
|
127 |
-
scheduler_cfg:
|
128 |
-
guidance_scale: 7.5
|
129 |
-
num_inference_steps: 50
|
130 |
-
eta: 0.0
|
131 |
-
|
132 |
-
noise:
|
133 |
-
target: diffusers.schedulers.DDPMScheduler
|
134 |
-
params:
|
135 |
-
num_train_timesteps: 1000
|
136 |
-
beta_start: 0.00085
|
137 |
-
beta_end: 0.012
|
138 |
-
beta_schedule: "scaled_linear"
|
139 |
-
variance_type: "fixed_small"
|
140 |
-
clip_sample: false
|
141 |
-
denoise:
|
142 |
-
target: diffusers.schedulers.DDIMScheduler
|
143 |
-
params:
|
144 |
-
num_train_timesteps: 1000
|
145 |
-
beta_start: 0.00085
|
146 |
-
beta_end: 0.012
|
147 |
-
beta_schedule: "scaled_linear"
|
148 |
-
clip_sample: false # clip sample to -1~1
|
149 |
-
set_alpha_to_one: false
|
150 |
-
steps_offset: 1
|
151 |
-
|
152 |
-
optimizer_cfg:
|
153 |
-
optimizer:
|
154 |
-
target: torch.optim.AdamW
|
155 |
-
params:
|
156 |
-
betas: [0.9, 0.99]
|
157 |
-
eps: 1.e-6
|
158 |
-
weight_decay: 1.e-2
|
159 |
-
|
160 |
-
scheduler:
|
161 |
-
target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
|
162 |
-
params:
|
163 |
-
warm_up_steps: 5000
|
164 |
-
f_start: 1.e-6
|
165 |
-
f_min: 1.e-3
|
166 |
-
f_max: 1.0
|
167 |
-
|
168 |
-
loss_cfg:
|
169 |
-
loss_type: "mse"
|
170 |
-
|
171 |
-
logger:
|
172 |
-
target: michelangelo.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
|
173 |
-
params:
|
174 |
-
step_frequency: 2000
|
175 |
-
num_samples: 4
|
176 |
-
sample_times: 4
|
177 |
-
mean: *mean
|
178 |
-
std: *std
|
179 |
-
bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
|
180 |
-
octree_depth: 7
|
181 |
-
num_chunks: 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
configs/deploy/clip_sp+pk_aslperceiver=256_01_4096_8_udt=03.yaml
DELETED
@@ -1,180 +0,0 @@
|
|
1 |
-
name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03"
|
2 |
-
#wandb:
|
3 |
-
# project: "image_diffuser"
|
4 |
-
# offline: false
|
5 |
-
|
6 |
-
training:
|
7 |
-
steps: 500000
|
8 |
-
use_amp: true
|
9 |
-
ckpt_path: ""
|
10 |
-
base_lr: 1.e-4
|
11 |
-
gradient_clip_val: 5.0
|
12 |
-
gradient_clip_algorithm: "norm"
|
13 |
-
every_n_train_steps: 5000
|
14 |
-
val_check_interval: 1024
|
15 |
-
limit_val_batches: 16
|
16 |
-
|
17 |
-
# dataset
|
18 |
-
dataset:
|
19 |
-
target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule
|
20 |
-
params:
|
21 |
-
batch_size: 38
|
22 |
-
num_workers: 4
|
23 |
-
val_num_workers: 4
|
24 |
-
buffer_size: 256
|
25 |
-
return_normal: true
|
26 |
-
random_crop: false
|
27 |
-
surface_sampling: true
|
28 |
-
pc_size: &pc_size 4096
|
29 |
-
image_size: 384
|
30 |
-
mean: &mean [0.5, 0.5, 0.5]
|
31 |
-
std: &std [0.5, 0.5, 0.5]
|
32 |
-
|
33 |
-
cond_stage_key: "text"
|
34 |
-
|
35 |
-
meta_info:
|
36 |
-
3D-FUTURE:
|
37 |
-
render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
|
38 |
-
tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
|
39 |
-
|
40 |
-
ABO:
|
41 |
-
render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
|
42 |
-
tar_folder: "/root/workspace/datasets/make_tars/ABO"
|
43 |
-
|
44 |
-
GSO:
|
45 |
-
render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
|
46 |
-
tar_folder: "/root/workspace/datasets/make_tars/GSO"
|
47 |
-
|
48 |
-
TOYS4K:
|
49 |
-
render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
|
50 |
-
tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
|
51 |
-
|
52 |
-
3DCaricShop:
|
53 |
-
render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
|
54 |
-
tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
|
55 |
-
|
56 |
-
Thingi10K:
|
57 |
-
render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
|
58 |
-
tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
|
59 |
-
|
60 |
-
shapenet:
|
61 |
-
render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
|
62 |
-
tar_folder: "/root/workspace/datasets/make_tars/shapenet"
|
63 |
-
|
64 |
-
pokemon:
|
65 |
-
render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
|
66 |
-
tar_folder: "/root/workspace/datasets/make_tars/pokemon"
|
67 |
-
|
68 |
-
objaverse:
|
69 |
-
render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
|
70 |
-
tar_folder: "/root/workspace/datasets/make_tars/objaverse"
|
71 |
-
|
72 |
-
model:
|
73 |
-
target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
|
74 |
-
params:
|
75 |
-
first_stage_config:
|
76 |
-
target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
|
77 |
-
params:
|
78 |
-
# ckpt_path: "/root/workspace/cq_workspace/michelangelo/experiments/aligned_shape_latents/clip_aslperceiver_sp+pk_01_01/ckpt/ckpt-step=00230000.ckpt"
|
79 |
-
shape_module_cfg:
|
80 |
-
target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
|
81 |
-
params:
|
82 |
-
num_latents: &num_latents 256
|
83 |
-
embed_dim: &embed_dim 64
|
84 |
-
point_feats: 3 # normal
|
85 |
-
num_freqs: 8
|
86 |
-
include_pi: false
|
87 |
-
heads: 12
|
88 |
-
width: 768
|
89 |
-
num_encoder_layers: 8
|
90 |
-
num_decoder_layers: 16
|
91 |
-
use_ln_post: true
|
92 |
-
init_scale: 0.25
|
93 |
-
qkv_bias: false
|
94 |
-
use_checkpoint: true
|
95 |
-
aligned_module_cfg:
|
96 |
-
target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
|
97 |
-
params:
|
98 |
-
clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
|
99 |
-
|
100 |
-
loss_cfg:
|
101 |
-
target: torch.nn.Identity
|
102 |
-
|
103 |
-
cond_stage_config:
|
104 |
-
target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder
|
105 |
-
params:
|
106 |
-
version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
|
107 |
-
zero_embedding_radio: 0.1
|
108 |
-
max_length: 77
|
109 |
-
|
110 |
-
first_stage_key: "surface"
|
111 |
-
cond_stage_key: "text"
|
112 |
-
scale_by_std: false
|
113 |
-
|
114 |
-
denoiser_cfg:
|
115 |
-
target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
|
116 |
-
params:
|
117 |
-
input_channels: *embed_dim
|
118 |
-
output_channels: *embed_dim
|
119 |
-
n_ctx: *num_latents
|
120 |
-
width: 768
|
121 |
-
layers: 8 # 2 * 6 + 1 = 13
|
122 |
-
heads: 12
|
123 |
-
context_dim: 768
|
124 |
-
init_scale: 1.0
|
125 |
-
skip_ln: true
|
126 |
-
use_checkpoint: true
|
127 |
-
|
128 |
-
scheduler_cfg:
|
129 |
-
guidance_scale: 7.5
|
130 |
-
num_inference_steps: 50
|
131 |
-
eta: 0.0
|
132 |
-
|
133 |
-
noise:
|
134 |
-
target: diffusers.schedulers.DDPMScheduler
|
135 |
-
params:
|
136 |
-
num_train_timesteps: 1000
|
137 |
-
beta_start: 0.00085
|
138 |
-
beta_end: 0.012
|
139 |
-
beta_schedule: "scaled_linear"
|
140 |
-
variance_type: "fixed_small"
|
141 |
-
clip_sample: false
|
142 |
-
denoise:
|
143 |
-
target: diffusers.schedulers.DDIMScheduler
|
144 |
-
params:
|
145 |
-
num_train_timesteps: 1000
|
146 |
-
beta_start: 0.00085
|
147 |
-
beta_end: 0.012
|
148 |
-
beta_schedule: "scaled_linear"
|
149 |
-
clip_sample: false # clip sample to -1~1
|
150 |
-
set_alpha_to_one: false
|
151 |
-
steps_offset: 1
|
152 |
-
|
153 |
-
optimizer_cfg:
|
154 |
-
optimizer:
|
155 |
-
target: torch.optim.AdamW
|
156 |
-
params:
|
157 |
-
betas: [0.9, 0.99]
|
158 |
-
eps: 1.e-6
|
159 |
-
weight_decay: 1.e-2
|
160 |
-
|
161 |
-
scheduler:
|
162 |
-
target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
|
163 |
-
params:
|
164 |
-
warm_up_steps: 5000
|
165 |
-
f_start: 1.e-6
|
166 |
-
f_min: 1.e-3
|
167 |
-
f_max: 1.0
|
168 |
-
|
169 |
-
loss_cfg:
|
170 |
-
loss_type: "mse"
|
171 |
-
|
172 |
-
logger:
|
173 |
-
target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger
|
174 |
-
params:
|
175 |
-
step_frequency: 1000
|
176 |
-
num_samples: 4
|
177 |
-
sample_times: 4
|
178 |
-
bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
|
179 |
-
octree_depth: 7
|
180 |
-
num_chunks: 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|