Spaces:
Runtime error
Runtime error
""" Image Generation Module for AutoGPT.""" | |
import io | |
import os.path | |
import uuid | |
from base64 import b64decode | |
import openai | |
import requests | |
from PIL import Image | |
from autogpt.config import Config | |
from autogpt.workspace import path_in_workspace | |
CFG = Config() | |
def generate_image(prompt: str, size: int = 256) -> str: | |
"""Generate an image from a prompt. | |
Args: | |
prompt (str): The prompt to use | |
size (int, optional): The size of the image. Defaults to 256. (Not supported by HuggingFace) | |
Returns: | |
str: The filename of the image | |
""" | |
filename = f"{str(uuid.uuid4())}.jpg" | |
# DALL-E | |
if CFG.image_provider == "dalle": | |
return generate_image_with_dalle(prompt, filename, size) | |
# HuggingFace | |
elif CFG.image_provider == "huggingface": | |
return generate_image_with_hf(prompt, filename) | |
# SD WebUI | |
elif CFG.image_provider == "sdwebui": | |
return generate_image_with_sd_webui(prompt, filename, size) | |
return "No Image Provider Set" | |
def generate_image_with_hf(prompt: str, filename: str) -> str: | |
"""Generate an image with HuggingFace's API. | |
Args: | |
prompt (str): The prompt to use | |
filename (str): The filename to save the image to | |
Returns: | |
str: The filename of the image | |
""" | |
API_URL = ( | |
f"https://api-inference.huggingface.co/models/{CFG.huggingface_image_model}" | |
) | |
if CFG.huggingface_api_token is None: | |
raise ValueError( | |
"You need to set your Hugging Face API token in the config file." | |
) | |
headers = { | |
"Authorization": f"Bearer {CFG.huggingface_api_token}", | |
"X-Use-Cache": "false", | |
} | |
response = requests.post( | |
API_URL, | |
headers=headers, | |
json={ | |
"inputs": prompt, | |
}, | |
) | |
image = Image.open(io.BytesIO(response.content)) | |
print(f"Image Generated for prompt:{prompt}") | |
image.save(path_in_workspace(filename)) | |
return f"Saved to disk:{filename}" | |
def generate_image_with_dalle(prompt: str, filename: str) -> str: | |
"""Generate an image with DALL-E. | |
Args: | |
prompt (str): The prompt to use | |
filename (str): The filename to save the image to | |
Returns: | |
str: The filename of the image | |
""" | |
openai.api_key = CFG.openai_api_key | |
# Check for supported image sizes | |
if size not in [256, 512, 1024]: | |
closest = min([256, 512, 1024], key=lambda x: abs(x - size)) | |
print( | |
f"DALL-E only supports image sizes of 256x256, 512x512, or 1024x1024. Setting to {closest}, was {size}." | |
) | |
size = closest | |
response = openai.Image.create( | |
prompt=prompt, | |
n=1, | |
size=f"{size}x{size}", | |
response_format="b64_json", | |
) | |
print(f"Image Generated for prompt:{prompt}") | |
image_data = b64decode(response["data"][0]["b64_json"]) | |
with open(path_in_workspace(filename), mode="wb") as png: | |
png.write(image_data) | |
return f"Saved to disk:{filename}" | |
def generate_image_with_sd_webui( | |
prompt: str, | |
filename: str, | |
size: int = 512, | |
negative_prompt: str = "", | |
extra: dict = {}, | |
) -> str: | |
"""Generate an image with Stable Diffusion webui. | |
Args: | |
prompt (str): The prompt to use | |
filename (str): The filename to save the image to | |
size (int, optional): The size of the image. Defaults to 256. | |
negative_prompt (str, optional): The negative prompt to use. Defaults to "". | |
extra (dict, optional): Extra parameters to pass to the API. Defaults to {}. | |
Returns: | |
str: The filename of the image | |
""" | |
# Create a session and set the basic auth if needed | |
s = requests.Session() | |
if CFG.sd_webui_auth: | |
username, password = CFG.sd_webui_auth.split(":") | |
s.auth = (username, password or "") | |
# Generate the images | |
response = requests.post( | |
f"{CFG.sd_webui_url}/sdapi/v1/txt2img", | |
json={ | |
"prompt": prompt, | |
"negative_prompt": negative_prompt, | |
"sampler_index": "DDIM", | |
"steps": 20, | |
"cfg_scale": 7.0, | |
"width": size, | |
"height": size, | |
"n_iter": 1, | |
**extra, | |
}, | |
) | |
print(f"Image Generated for prompt:{prompt}") | |
# Save the image to disk | |
response = response.json() | |
b64 = b64decode(response["images"][0].split(",", 1)[0]) | |
image = Image.open(io.BytesIO(b64)) | |
image.save(path_in_workspace(filename)) | |
return f"Saved to disk:{filename}" | |