Desm0nt commited on
Commit
c4edf1f
1 Parent(s): 0380c20

Upload convert.py

Browse files
Files changed (1) hide show
  1. convert.py +41 -0
convert.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_done\\cleaned\\"
5
+ base_folder = "d:\\Dropbox\\YandexDisk\\Dataset\\"
6
+ tags_folder_path = "d:\\Dropbox\\YandexDisk\\Dataset\\Human_Captions_basetxt\\"
7
+ json_data = []
8
+ id_counter = 0
9
+
10
+ for filename in os.listdir(folder_path):
11
+ if filename.endswith(".jpg"):
12
+ image_name = os.path.splitext(filename)[0]
13
+ image_path = os.path.join(folder_path, filename)
14
+ txt_path = os.path.join(folder_path, f"{image_name}.txt")
15
+
16
+ if os.path.exists(txt_path):
17
+ with open(txt_path, "r") as f:
18
+ txt_content = f.read()
19
+
20
+ tags_path = os.path.join(tags_folder_path, f"{image_name}.txt")
21
+ if os.path.exists(tags_path):
22
+ with open(tags_path, "r") as f:
23
+ tags_content = f.read().strip()
24
+ prompt = f"<ImageHere> Make a caption that describe this image. Here is the tags for this image: {tags_content}"
25
+ else:
26
+ prompt = "<ImageHere> Make a caption that describe this image"
27
+
28
+ json_object = {
29
+ "id": str(id_counter),
30
+ "image": [image_path],
31
+ "conversations": [
32
+ {"from": "user", "value": prompt},
33
+ {"from": "assistant", "value": txt_content}
34
+ ]
35
+ }
36
+
37
+ json_data.append(json_object)
38
+ id_counter += 1
39
+
40
+ with open(os.path.join(base_folder, "output.json"), "w") as f:
41
+ json.dump(json_data, f, indent=4)