import base64 import requests import os from openai import OpenAI from tqdm import tqdm import time import sys # Проверка наличия аргумента командной строки if len(sys.argv) < 2: print("Please, provide the path to image folder.") sys.exit(1) # Get the path to image dir from command line. image_dir = sys.argv[1] openai_api_key = "EMPTY" openai_api_base = "http://localhost:8000/v1" client = OpenAI( api_key=openai_api_key, base_url=openai_api_base, ) model_type = client.models.list().data[0].id print(f'model_type: {model_type}') # Function to encode the image def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') # Directories #dir with tags captions from wd tagger txt_dir = './txt/' #dir with result captions maintxt_dir = './maintxt/' image_path ='' # Ensure the output directory exists os.makedirs(maintxt_dir, exist_ok=True) # Get list of all JPEG images in the directory image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg'))] total_files = len(image_files) start_time = time.time() progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]') total_elapsed_time = 0 processed_files = 0 # Process all images in the image directory for image_file in image_files: image_path = os.path.join(image_dir, image_file) txt_file = os.path.join(txt_dir, os.path.splitext(image_file)[0] + '.txt') output_file = os.path.join(maintxt_dir, os.path.splitext(image_file)[0] + '.txt') # Read tags from the corresponding txt file with open(txt_file, 'r') as f: tags = f.read().strip() base64_image = encode_image(image_path) step_start_time = time.time() chat_response = client.chat.completions.create( model="./phi3_v14_800-merged", messages=[{ "role": "user", "content": [ {"type": "text", "text": f"Make a caption that describe this image. Here is the tags for this image: {tags}"}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" }, }, ], }], extra_body={'repetition_penalty': 1.05, 'top_k': -1,'top_p': 1,'temperature': 0, 'use_beam_search': True, 'best_of':5}, ) step_end_time = time.time() step_time = step_end_time - step_start_time total_elapsed_time += step_time remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files - 1) # Convert remaining time to hours, minutes and seconds remaining_hours = int(remaining_time // 3600) remaining_minutes = int((remaining_time % 3600) // 60) remaining_seconds = int(remaining_time % 60) # Extract the content from the response content = chat_response.choices[0].message.content content = content.lstrip() # Write the content to the output file with open(output_file, 'w', encoding='utf-8') as f: f.write(content) print(f"\n\nFile {image_file}\nProcessing time: {step_time:.2f} seconds\n{content}") print(f"Response saved to file: {output_file}") processed_files += 1 progress_bar.update(1) progress_bar.set_postfix(remaining=f'{remaining_hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}', refresh=True) progress_bar.close() print("All images processed.") print(f"Total time: {time.time() - start_time:.2f} seconds")