import torch from safetensors.torch import load_file, save_file def swap_scale_shift(weight, dim): shift, scale = weight.chunk(2, dim=0) new_weight = torch.cat([scale, shift], dim=0) return new_weight file_path = "./sd3_medium.safetensors" file_path_diff = "./diffusion_pytorch_model.safetensors" loaded = load_file(file_path) diffload = load_file(file_path_diff) loaded["model.diffusion_model.context_embedder.bias"] = diffload["context_embedder.bias"] loaded["model.diffusion_model.context_embedder.weight"] = diffload["context_embedder.weight"] loaded["model.diffusion_model.pos_embed"] = diffload["pos_embed.pos_embed"] loaded["model.diffusion_model.x_embedder.proj.bias"] = diffload["pos_embed.proj.bias"] loaded["model.diffusion_model.x_embedder.proj.weight"] = diffload["pos_embed.proj.weight"] loaded["model.diffusion_model.t_embedder.mlp.0.bias"] = diffload["time_text_embed.timestep_embedder.linear_1.bias"] loaded["model.diffusion_model.t_embedder.mlp.0.weight"] = diffload["time_text_embed.timestep_embedder.linear_1.weight"] loaded["model.diffusion_model.t_embedder.mlp.2.bias"] = diffload["time_text_embed.timestep_embedder.linear_2.bias"] loaded["model.diffusion_model.t_embedder.mlp.2.weight"] = diffload["time_text_embed.timestep_embedder.linear_2.weight"] loaded["model.diffusion_model.y_embedder.mlp.0.bias"] = diffload["time_text_embed.text_embedder.linear_1.bias"] loaded["model.diffusion_model.y_embedder.mlp.0.weight"] = diffload["time_text_embed.text_embedder.linear_1.weight"] loaded["model.diffusion_model.y_embedder.mlp.2.bias"] = diffload["time_text_embed.text_embedder.linear_2.bias"] loaded["model.diffusion_model.y_embedder.mlp.2.weight"] = diffload["time_text_embed.text_embedder.linear_2.weight"] loaded["model.diffusion_model.final_layer.adaLN_modulation.1.bias"] = swap_scale_shift(diffload["norm_out.linear.bias"], dim = 1536) loaded["model.diffusion_model.final_layer.adaLN_modulation.1.weight"] = swap_scale_shift(diffload["norm_out.linear.weight"], dim = 1536) loaded["model.diffusion_model.final_layer.linear.bias"] = diffload["proj_out.bias"] loaded["model.diffusion_model.final_layer.linear.weight"] = diffload["proj_out.weight"] for iii in range(0, 23) : loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.adaLN_modulation.1.bias"] = diffload["transformer_blocks."+str(iii)+".norm1_context.linear.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.adaLN_modulation.1.weight"] = diffload["transformer_blocks."+str(iii)+".norm1_context.linear.weight"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.attn.proj.bias"] = diffload["transformer_blocks."+str(iii)+".attn.to_add_out.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.attn.proj.weight"] = diffload["transformer_blocks."+str(iii)+".attn.to_add_out.weight"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.attn.qkv.bias"] = torch.cat((diffload["transformer_blocks."+str(iii)+".attn.add_q_proj.bias"], diffload["transformer_blocks."+str(iii)+".attn.add_k_proj.bias"], diffload["transformer_blocks."+str(iii)+".attn.add_v_proj.bias"]), dim=0) loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.attn.qkv.weight"] = torch.cat((diffload["transformer_blocks."+str(iii)+".attn.add_q_proj.weight"], diffload["transformer_blocks."+str(iii)+".attn.add_k_proj.weight"], diffload["transformer_blocks."+str(iii)+".attn.add_v_proj.weight"]), dim=0) loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.adaLN_modulation.1.bias"] = diffload["transformer_blocks."+str(iii)+".norm1.linear.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.adaLN_modulation.1.weight"] = diffload["transformer_blocks."+str(iii)+".norm1.linear.weight"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.attn.proj.bias"] = diffload["transformer_blocks."+str(iii)+".attn.to_out.0.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.attn.proj.weight"] = diffload["transformer_blocks."+str(iii)+".attn.to_out.0.weight"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.attn.qkv.bias"] = torch.cat((diffload["transformer_blocks."+str(iii)+".attn.to_q.bias"], diffload["transformer_blocks."+str(iii)+".attn.to_k.bias"], diffload["transformer_blocks."+str(iii)+".attn.to_v.bias"]), dim=0) loaded["model.diffusion_model.joint_blocks."+str(iii)+".x_block.attn.qkv.weight"] = torch.cat((diffload["transformer_blocks."+str(iii)+".attn.to_q.weight"], diffload["transformer_blocks."+str(iii)+".attn.to_k.weight"], diffload["transformer_blocks."+str(iii)+".attn.to_v.weight"]), dim=0) loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.mlp.fc1.bias"] = diffload["transformer_blocks."+str(iii)+".ff_context.net.0.proj.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.mlp.fc1.weight"] = diffload["transformer_blocks."+str(iii)+".ff_context.net.0.proj.weight"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.mlp.fc2.bias"] = diffload["transformer_blocks."+str(iii)+".ff_context.net.2.bias"] loaded["model.diffusion_model.joint_blocks."+str(iii)+".context_block.mlp.fc2.weight"] = diffload["transformer_blocks."+str(iii)+".ff_context.net.2.weight"] loaded["model.diffusion_model.joint_blocks.23.context_block.adaLN_modulation.1.bias"] = swap_scale_shift(diffload["transformer_blocks.23.norm1_context.linear.bias"], dim = 1536) loaded["model.diffusion_model.joint_blocks.23.context_block.adaLN_modulation.1.weight"] = swap_scale_shift(diffload["transformer_blocks.23.norm1_context.linear.weight"], dim = 1536) loaded["model.diffusion_model.joint_blocks.23.context_block.attn.qkv.bias"] = torch.cat((diffload["transformer_blocks.23.attn.add_q_proj.bias"], diffload["transformer_blocks.23.attn.add_k_proj.bias"], diffload["transformer_blocks.23.attn.add_v_proj.bias"]), dim=0) loaded["model.diffusion_model.joint_blocks.23.context_block.attn.qkv.weight"] = torch.cat((diffload["transformer_blocks.23.attn.add_q_proj.weight"], diffload["transformer_blocks.23.attn.add_k_proj.weight"], diffload["transformer_blocks.23.attn.add_v_proj.weight"]), dim=0) loaded["model.diffusion_model.joint_blocks.23.x_block.adaLN_modulation.1.bias"] = diffload["transformer_blocks.23.norm1.linear.bias"] loaded["model.diffusion_model.joint_blocks.23.x_block.adaLN_modulation.1.weight"] = diffload["transformer_blocks.23.norm1.linear.weight"] loaded["model.diffusion_model.joint_blocks.23.x_block.attn.proj.bias"] = diffload["transformer_blocks.23.attn.to_out.0.bias"] loaded["model.diffusion_model.joint_blocks.23.x_block.attn.proj.weight"] = diffload["transformer_blocks.23.attn.to_out.0.weight"] loaded["model.diffusion_model.joint_blocks.23.x_block.attn.qkv.bias"] = torch.cat((diffload["transformer_blocks.23.attn.to_q.bias"], diffload["transformer_blocks.23.attn.to_k.bias"], diffload["transformer_blocks.23.attn.to_v.bias"]), dim=0) loaded["model.diffusion_model.joint_blocks.23.x_block.attn.qkv.weight"] = torch.cat((diffload["transformer_blocks.23.attn.to_q.weight"], diffload["transformer_blocks.23.attn.to_k.weight"], diffload["transformer_blocks.23.attn.to_v.weight"]), dim=0) save_file(loaded, "sd3-reality-mix.safetensors") # manual surgery