import os import json folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\" base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\" tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\" json_data = [] id_counter = 0 for filename in os.listdir(folder_path): if filename.endswith(".jpg"): image_name = os.path.splitext(filename)[0] image_path = os.path.join(folder_path, filename) txt_path = os.path.join(folder_path, f"{image_name}.txt") if os.path.exists(txt_path): with open(txt_path, "r") as f: txt_content = f.read() tags_path = os.path.join(tags_folder_path, f"{image_name}.txt") if os.path.exists(tags_path): with open(tags_path, "r") as f: tags_content = f.read().strip() prompt = f" Make a caption that describe this image. Here is the tags for this image: {tags_content}" else: prompt = " Make a caption that describe this image" json_object = { "id": str(id_counter), "image": [image_path], "conversations": [ {"from": "user", "value": prompt}, {"from": "assistant", "value": txt_content} ] } json_data.append(json_object) id_counter += 1 with open(os.path.join(base_folder, "output.json"), "w") as f: json.dump(json_data, f, indent=4)