patrickvonplaten
commited on
Commit
•
c9c864b
1
Parent(s):
0b1fa07
[Celeba-256] Upload first model
Browse files- generated_image.png +0 -0
- model_index.json +16 -0
- run.py +51 -0
- scheduler/scheduler_config.json +11 -0
- unet/config.json +35 -0
- unet/diffusion_model.pt +3 -0
- vqvae/config.json +24 -0
- vqvae/diffusion_model.pt +3 -0
generated_image.png
ADDED
model_index.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "LatentDiffusionUncondPipeline",
|
3 |
+
"_diffusers_version": "0.0.4",
|
4 |
+
"scheduler": [
|
5 |
+
"diffusers",
|
6 |
+
"DDIMScheduler"
|
7 |
+
],
|
8 |
+
"unet": [
|
9 |
+
"diffusers",
|
10 |
+
"UNetUnconditionalModel"
|
11 |
+
],
|
12 |
+
"vqvae": [
|
13 |
+
"diffusers",
|
14 |
+
"VQModel"
|
15 |
+
]
|
16 |
+
}
|
run.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import UNetUnconditionalModel, DDIMScheduler, VQModel
|
3 |
+
import torch
|
4 |
+
import PIL.Image
|
5 |
+
import numpy as np
|
6 |
+
import tqdm
|
7 |
+
|
8 |
+
# load all models
|
9 |
+
unet = UNetUnconditionalModel.from_pretrained("./", subfolder="unet")
|
10 |
+
vqvae = VQModel.from_pretrained("./", subfolder="vqvae")
|
11 |
+
scheduler = DDIMScheduler.from_config("./", subfolder="scheduler")
|
12 |
+
|
13 |
+
# set to cuda
|
14 |
+
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
+
|
16 |
+
unet.to(torch_device)
|
17 |
+
vqvae.to(torch_device)
|
18 |
+
|
19 |
+
# generate gaussian noise to be decoded
|
20 |
+
generator = torch.manual_seed(0)
|
21 |
+
noise = torch.randn(
|
22 |
+
(1, unet.in_channels, unet.image_size, unet.image_size),
|
23 |
+
generator=generator,
|
24 |
+
).to(torch_device)
|
25 |
+
|
26 |
+
# set inference steps for DDIM
|
27 |
+
scheduler.set_timesteps(num_inference_steps=50)
|
28 |
+
|
29 |
+
image = noise
|
30 |
+
for t in tqdm.tqdm(scheduler.timesteps):
|
31 |
+
# predict noise residual of previous image
|
32 |
+
with torch.no_grad():
|
33 |
+
residual = unet(image, t)["sample"]
|
34 |
+
|
35 |
+
# compute previous image x_t according to DDIM formula
|
36 |
+
prev_image = scheduler.step(residual, t, image, eta=0.0)["prev_sample"]
|
37 |
+
|
38 |
+
# x_t-1 -> x_t
|
39 |
+
image = prev_image
|
40 |
+
|
41 |
+
# decode image with vae
|
42 |
+
with torch.no_grad():
|
43 |
+
image = vqvae.decode(image)
|
44 |
+
|
45 |
+
# process image
|
46 |
+
image_processed = image.cpu().permute(0, 2, 3, 1)
|
47 |
+
image_processed = (image_processed + 1.0) * 127.5
|
48 |
+
image_processed = image_processed.numpy().astype(np.uint8)
|
49 |
+
image_pil = PIL.Image.fromarray(image_processed[0])
|
50 |
+
|
51 |
+
image_pil.save("generated_image.png")
|
scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "DDIMScheduler",
|
3 |
+
"_diffusers_version": "0.0.4",
|
4 |
+
"beta_end": 0.0195,
|
5 |
+
"beta_schedule": "scaled_linear",
|
6 |
+
"beta_start": 0.0015,
|
7 |
+
"clip_sample": false,
|
8 |
+
"timestep_values": null,
|
9 |
+
"timesteps": 1000,
|
10 |
+
"trained_betas": null
|
11 |
+
}
|
unet/config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNetUnconditionalModel",
|
3 |
+
"_diffusers_version": "0.0.4",
|
4 |
+
"attention_resolutions": [
|
5 |
+
8,
|
6 |
+
4,
|
7 |
+
2
|
8 |
+
],
|
9 |
+
"down_blocks": ["UNetResDownBlock2D", "UNetResAttnDownBlock2D", "UNetResAttnDownBlock2D", "UNetResAttnDownBlock2D"],
|
10 |
+
"up_blocks": ["UNetResAttnUpBlock2D", "UNetResAttnUpBlock2D", "UNetResAttnUpBlock2D", "UNetResUpBlock2D"],
|
11 |
+
"down_block_input_channels": [224, 224, 448, 672],
|
12 |
+
"down_block_output_channels": [224, 448, 672, 896],
|
13 |
+
"context_dim": null,
|
14 |
+
"conv_resample": true,
|
15 |
+
"dims": 2,
|
16 |
+
"dropout": 0,
|
17 |
+
"image_size": 64,
|
18 |
+
"in_channels": 3,
|
19 |
+
"legacy": true,
|
20 |
+
"n_embed": null,
|
21 |
+
"num_classes": null,
|
22 |
+
"num_head_channels": 32,
|
23 |
+
"num_heads": -1,
|
24 |
+
"num_heads_upsample": -1,
|
25 |
+
"num_res_blocks": 2,
|
26 |
+
"out_channels": 3,
|
27 |
+
"resblock_updown": false,
|
28 |
+
"transformer_depth": 1,
|
29 |
+
"use_checkpoint": false,
|
30 |
+
"use_fp16": false,
|
31 |
+
"use_new_attention_order": false,
|
32 |
+
"use_scale_shift_norm": false,
|
33 |
+
"use_spatial_transformer": false,
|
34 |
+
"ldm": true
|
35 |
+
}
|
unet/diffusion_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b655ee0d741c2de23be13d7031c8365b7c17f61b5921a42d1173e1e20d48067
|
3 |
+
size 1096382177
|
vqvae/config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "VQModel",
|
3 |
+
"_diffusers_version": "0.0.4",
|
4 |
+
"attn_resolutions": [],
|
5 |
+
"ch": 128,
|
6 |
+
"ch_mult": [
|
7 |
+
1,
|
8 |
+
2,
|
9 |
+
4
|
10 |
+
],
|
11 |
+
"double_z": false,
|
12 |
+
"dropout": 0.0,
|
13 |
+
"embed_dim": 3,
|
14 |
+
"give_pre_end": false,
|
15 |
+
"in_channels": 3,
|
16 |
+
"n_embed": 8192,
|
17 |
+
"num_res_blocks": 2,
|
18 |
+
"out_ch": 3,
|
19 |
+
"remap": null,
|
20 |
+
"resamp_with_conv": true,
|
21 |
+
"resolution": 256,
|
22 |
+
"sane_index_shape": false,
|
23 |
+
"z_channels": 3
|
24 |
+
}
|
vqvae/diffusion_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e383b55bf3faeffafffb49286ae11c41611557c6c2b0dfbf09a0d3ea94590ae8
|
3 |
+
size 221364711
|