File size: 1,585 Bytes
c4edf1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import json

folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
json_data = []
id_counter = 0

for filename in os.listdir(folder_path):
    if filename.endswith(".jpg"):
        image_name = os.path.splitext(filename)[0]
        image_path = os.path.join(folder_path, filename)
        txt_path = os.path.join(folder_path, f"{image_name}.txt")

        if os.path.exists(txt_path):
            with open(txt_path, "r") as f:
                txt_content = f.read()

            tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
            if os.path.exists(tags_path):
                with open(tags_path, "r") as f:
                    tags_content = f.read().strip()
                prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
            else:
                prompt = "<ImageHere> Make a caption that describe this image"

            json_object = {
                "id": str(id_counter),
                "image": [image_path],
                "conversations": [
                    {"from": "user", "value": prompt},
                    {"from": "assistant", "value": txt_content}
                ]
            }

            json_data.append(json_object)
            id_counter += 1

with open(os.path.join(base_folder, "output.json"), "w") as f:
    json.dump(json_data, f, indent=4)