import os from transformers import CLIPProcessor, CLIPModel import torch from PIL import Image # Get the directory of the script script_directory = os.path.dirname(os.path.realpath(__file__)) # Specify the directory where the cache will be stored (same folder as the script) cache_directory = os.path.join(script_directory, "cache") # Create the cache directory if it doesn't exist os.makedirs(cache_directory, exist_ok=True) # Load the CLIP processor and model clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=cache_directory) clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=cache_directory) # Text description to generate image text = "a cat sitting on a table" # Tokenize text and get features inputs = clip_processor(text, return_tensors="pt", padding=True) with torch.no_grad(): image_features = clip_model.get_image_features(**inputs) # Generate image from features generated_image = clip_model.generate_images(image_features) # Save the generated image output_image_path = "generated_image.png" Image.fromarray(generated_image).save(output_image_path) print("Image generated and saved as:", output_image_path)