File size: 3,762 Bytes
bf4ab95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import base64
import requests
import os
from openai import OpenAI
from tqdm import tqdm
import time
import sys

# Проверка наличия аргумента командной строки
if len(sys.argv) < 2:
    print("Please, provide the path to image folder.")
    sys.exit(1)

# Get the path to image dir from command line.
image_dir = sys.argv[1]

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

model_type = client.models.list().data[0].id
print(f'model_type: {model_type}')

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Directories
#dir with tags captions from wd tagger
txt_dir = './txt/'
#dir with result captions
maintxt_dir = './maintxt/'
image_path =''

# Ensure the output directory exists
os.makedirs(maintxt_dir, exist_ok=True)

# Get list of all JPEG images in the directory
image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg'))]

total_files = len(image_files)
start_time = time.time()

progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
total_elapsed_time = 0
processed_files = 0

# Process all images in the image directory
for image_file in image_files:
    image_path = os.path.join(image_dir, image_file)
    txt_file = os.path.join(txt_dir, os.path.splitext(image_file)[0] + '.txt')
    output_file = os.path.join(maintxt_dir, os.path.splitext(image_file)[0] + '.txt')

    # Read tags from the corresponding txt file
    with open(txt_file, 'r') as f:
        tags = f.read().strip()

    base64_image = encode_image(image_path)

    step_start_time = time.time()

    chat_response = client.chat.completions.create(
        model="./phi3_v14_800-merged",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text": f"Make a caption that describe this image. Here is the tags for this image: {tags}"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    },
                },
            ],
        }],
        extra_body={'repetition_penalty': 1.05, 'top_k': -1,'top_p': 1,'temperature': 0, 'use_beam_search': True, 'best_of':5},
    )

    step_end_time = time.time()
    step_time = step_end_time - step_start_time
    total_elapsed_time += step_time
    remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files - 1)

    # Convert remaining time to hours, minutes and seconds
    remaining_hours = int(remaining_time // 3600)
    remaining_minutes = int((remaining_time % 3600) // 60)
    remaining_seconds = int(remaining_time % 60)

    # Extract the content from the response
    content = chat_response.choices[0].message.content
    content = content.lstrip()
    # Write the content to the output file
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(content)

    print(f"\n\nFile {image_file}\nProcessing time: {step_time:.2f} seconds\n{content}")
    print(f"Response saved to file: {output_file}")
    
    processed_files += 1
    progress_bar.update(1)
    progress_bar.set_postfix(remaining=f'{remaining_hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}', refresh=True)

progress_bar.close()
print("All images processed.")
print(f"Total time: {time.time() - start_time:.2f} seconds")