yrr commited on
Commit
2ff7a7b
1 Parent(s): c818730

update inference code

Browse files
Files changed (1) hide show
  1. app.py +26 -4
app.py CHANGED
@@ -10,9 +10,9 @@ pipe = OmniGenPipeline.from_pretrained(
10
  "Shitao/OmniGen-v1"
11
  )
12
 
13
- @spaces.GPU(duration=300)
14
  def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
15
- use_input_image_size_as_output):
16
  input_images = [img1, img2, img3]
17
  # Delete None
18
  input_images = [img for img in input_images if img is not None]
@@ -33,6 +33,7 @@ def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_gu
33
  offload_model=offload_model,
34
  use_input_image_size_as_output=use_input_image_size_as_output,
35
  seed=seed,
 
36
  )
37
  img = output[0]
38
  return img
@@ -55,6 +56,7 @@ def get_example():
55
  True,
56
  False,
57
  False,
 
58
  ],
59
  [
60
  "The woman in <img><|image_1|></img> waves her hand happily in the crowd",
@@ -70,6 +72,7 @@ def get_example():
70
  True,
71
  False,
72
  False,
 
73
  ],
74
  [
75
  "A man in a black shirt is reading a book. The man is the right man in <img><|image_1|></img>.",
@@ -85,6 +88,7 @@ def get_example():
85
  True,
86
  False,
87
  False,
 
88
  ],
89
  [
90
  "Two woman are raising fried chicken legs in a bar. A woman is <img><|image_1|></img>. The other woman is <img><|image_2|></img>.",
@@ -100,6 +104,7 @@ def get_example():
100
  True,
101
  False,
102
  False,
 
103
  ],
104
  [
105
  "A man and a short-haired woman with a wrinkled face are standing in front of a bookshelf in a library. The man is the man in the middle of <img><|image_1|></img>, and the woman is oldest woman in <img><|image_2|></img>",
@@ -115,6 +120,7 @@ def get_example():
115
  True,
116
  False,
117
  False,
 
118
  ],
119
  [
120
  "A man and a woman are sitting at a classroom desk. The man is the man with yellow hair in <img><|image_1|></img>. The woman is the woman on the left of <img><|image_2|></img>",
@@ -130,6 +136,7 @@ def get_example():
130
  True,
131
  False,
132
  False,
 
133
  ],
134
  [
135
  "The flower <img><|image_1|><\/img> is placed in the vase which is in the middle of <img><|image_2|><\/img> on a wooden table of a living room",
@@ -145,6 +152,7 @@ def get_example():
145
  True,
146
  False,
147
  False,
 
148
  ],
149
  [
150
  "<img><|image_1|><img>\n Remove the woman's earrings. Replace the mug with a clear glass filled with sparkling iced cola.",
@@ -160,6 +168,7 @@ def get_example():
160
  True,
161
  False,
162
  True,
 
163
  ],
164
  [
165
  "Detect the skeleton of human in this image: <img><|image_1|></img>.",
@@ -175,6 +184,7 @@ def get_example():
175
  True,
176
  False,
177
  True,
 
178
  ],
179
  [
180
  "Generate a new photo using the following picture and text as conditions: <img><|image_1|><img>\n A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
@@ -190,6 +200,7 @@ def get_example():
190
  True,
191
  False,
192
  True,
 
193
  ],
194
  [
195
  "Following the pose of this image <img><|image_1|><img>, generate a new photo: A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
@@ -205,6 +216,7 @@ def get_example():
205
  True,
206
  False,
207
  True,
 
208
  ],
209
  [
210
  "Following the depth mapping of this image <img><|image_1|><img>, generate a new photo: A young girl is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
@@ -220,6 +232,7 @@ def get_example():
220
  True,
221
  False,
222
  True,
 
223
  ],
224
  [
225
  "<img><|image_1|><\/img> What item can be used to see the current time? Please remove it.",
@@ -235,6 +248,7 @@ def get_example():
235
  True,
236
  False,
237
  True,
 
238
  ],
239
  [
240
  "According to the following examples, generate an output for the input.\nInput: <img><|image_1|></img>\nOutput: <img><|image_2|></img>\n\nInput: <img><|image_3|></img>\nOutput: ",
@@ -250,14 +264,15 @@ def get_example():
250
  True,
251
  False,
252
  False,
 
253
  ],
254
  ]
255
  return case
256
 
257
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
258
- use_input_image_size_as_output):
259
  return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
260
- use_input_image_size_as_output)
261
 
262
  description = """
263
  OmniGen is a unified image generation model that you can use to perform various tasks, including but not limited to text-to-image generation, subject-driven generation, Identity-Preserving Generation, and image-conditioned generation.
@@ -267,6 +282,7 @@ prompt = "A woman holds a bouquet of flowers and faces the camera. Thw woman is
267
 
268
  Tips:
269
  - For out of memory or time cost, you can refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
 
270
  - Oversaturated: If the image appears oversaturated, please reduce the `guidance_scale`.
271
  - Not match the prompt: If the image does not match the prompt, please try to increase the `guidance_scale`.
272
  - Low-quality: More detailed prompt will lead to better results.
@@ -338,6 +354,10 @@ with gr.Blocks() as demo:
338
  label="Seed", minimum=0, maximum=2147483647, value=42, step=1
339
  )
340
 
 
 
 
 
341
  separate_cfg_infer = gr.Checkbox(
342
  label="separate_cfg_infer", info="Whether to use separate inference process for different guidance. This will reduce the memory cost.", value=True,
343
  )
@@ -373,6 +393,7 @@ with gr.Blocks() as demo:
373
  separate_cfg_infer,
374
  offload_model,
375
  use_input_image_size_as_output,
 
376
  ],
377
  outputs=output_image,
378
  )
@@ -394,6 +415,7 @@ with gr.Blocks() as demo:
394
  separate_cfg_infer,
395
  offload_model,
396
  use_input_image_size_as_output,
 
397
  ],
398
  outputs=output_image,
399
  )
 
10
  "Shitao/OmniGen-v1"
11
  )
12
 
13
+ @spaces.GPU(duration=160)
14
  def generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
15
+ use_input_image_size_as_output, max_input_image_size):
16
  input_images = [img1, img2, img3]
17
  # Delete None
18
  input_images = [img for img in input_images if img is not None]
 
33
  offload_model=offload_model,
34
  use_input_image_size_as_output=use_input_image_size_as_output,
35
  seed=seed,
36
+ max_input_image_size=max_input_image_size,
37
  )
38
  img = output[0]
39
  return img
 
56
  True,
57
  False,
58
  False,
59
+ 1024,
60
  ],
61
  [
62
  "The woman in <img><|image_1|></img> waves her hand happily in the crowd",
 
72
  True,
73
  False,
74
  False,
75
+ 1024,
76
  ],
77
  [
78
  "A man in a black shirt is reading a book. The man is the right man in <img><|image_1|></img>.",
 
88
  True,
89
  False,
90
  False,
91
+ 1024,
92
  ],
93
  [
94
  "Two woman are raising fried chicken legs in a bar. A woman is <img><|image_1|></img>. The other woman is <img><|image_2|></img>.",
 
104
  True,
105
  False,
106
  False,
107
+ 768,
108
  ],
109
  [
110
  "A man and a short-haired woman with a wrinkled face are standing in front of a bookshelf in a library. The man is the man in the middle of <img><|image_1|></img>, and the woman is oldest woman in <img><|image_2|></img>",
 
120
  True,
121
  False,
122
  False,
123
+ 768,
124
  ],
125
  [
126
  "A man and a woman are sitting at a classroom desk. The man is the man with yellow hair in <img><|image_1|></img>. The woman is the woman on the left of <img><|image_2|></img>",
 
136
  True,
137
  False,
138
  False,
139
+ 768,
140
  ],
141
  [
142
  "The flower <img><|image_1|><\/img> is placed in the vase which is in the middle of <img><|image_2|><\/img> on a wooden table of a living room",
 
152
  True,
153
  False,
154
  False,
155
+ 768,
156
  ],
157
  [
158
  "<img><|image_1|><img>\n Remove the woman's earrings. Replace the mug with a clear glass filled with sparkling iced cola.",
 
168
  True,
169
  False,
170
  True,
171
+ 1024,
172
  ],
173
  [
174
  "Detect the skeleton of human in this image: <img><|image_1|></img>.",
 
184
  True,
185
  False,
186
  True,
187
+ 1024,
188
  ],
189
  [
190
  "Generate a new photo using the following picture and text as conditions: <img><|image_1|><img>\n A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
 
200
  True,
201
  False,
202
  True,
203
+ 1024,
204
  ],
205
  [
206
  "Following the pose of this image <img><|image_1|><img>, generate a new photo: A young boy is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
 
216
  True,
217
  False,
218
  True,
219
+ 1024,
220
  ],
221
  [
222
  "Following the depth mapping of this image <img><|image_1|><img>, generate a new photo: A young girl is sitting on a sofa in the library, holding a book. His hair is neatly combed, and a faint smile plays on his lips, with a few freckles scattered across his cheeks. The library is quiet, with rows of shelves filled with books stretching out behind him.",
 
232
  True,
233
  False,
234
  True,
235
+ 1024,
236
  ],
237
  [
238
  "<img><|image_1|><\/img> What item can be used to see the current time? Please remove it.",
 
248
  True,
249
  False,
250
  True,
251
+ 1024,
252
  ],
253
  [
254
  "According to the following examples, generate an output for the input.\nInput: <img><|image_1|></img>\nOutput: <img><|image_2|></img>\n\nInput: <img><|image_3|></img>\nOutput: ",
 
264
  True,
265
  False,
266
  False,
267
+ 768,
268
  ],
269
  ]
270
  return case
271
 
272
  def run_for_examples(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
273
+ use_input_image_size_as_output, max_input_image_size):
274
  return generate_image(text, img1, img2, img3, height, width, guidance_scale, img_guidance_scale, inference_steps, seed, separate_cfg_infer, offload_model,
275
+ use_input_image_size_as_output, max_input_image_size)
276
 
277
  description = """
278
  OmniGen is a unified image generation model that you can use to perform various tasks, including but not limited to text-to-image generation, subject-driven generation, Identity-Preserving Generation, and image-conditioned generation.
 
282
 
283
  Tips:
284
  - For out of memory or time cost, you can refer to [./docs/inference.md#requiremented-resources](https://github.com/VectorSpaceLab/OmniGen/blob/main/docs/inference.md#requiremented-resources) to select a appropriate setting.
285
+ - If time cost is too long, please try to reduce the `max_input_image_size`.
286
  - Oversaturated: If the image appears oversaturated, please reduce the `guidance_scale`.
287
  - Not match the prompt: If the image does not match the prompt, please try to increase the `guidance_scale`.
288
  - Low-quality: More detailed prompt will lead to better results.
 
354
  label="Seed", minimum=0, maximum=2147483647, value=42, step=1
355
  )
356
 
357
+ max_input_image_size = gr.Slider(
358
+ label="max_input_image_size", minimum=128, maximum=2048, value=1024, step=16
359
+ )
360
+
361
  separate_cfg_infer = gr.Checkbox(
362
  label="separate_cfg_infer", info="Whether to use separate inference process for different guidance. This will reduce the memory cost.", value=True,
363
  )
 
393
  separate_cfg_infer,
394
  offload_model,
395
  use_input_image_size_as_output,
396
+ max_input_image_size,
397
  ],
398
  outputs=output_image,
399
  )
 
415
  separate_cfg_infer,
416
  offload_model,
417
  use_input_image_size_as_output,
418
+ max_input_image_size,
419
  ],
420
  outputs=output_image,
421
  )