nopperl commited on
Commit
5724056
1 Parent(s): 247ad11
Files changed (4) hide show
  1. README.md +6 -0
  2. checkpoints/epoch_latest.pt +3 -0
  3. eval_results.jsonl +40 -0
  4. params.txt +91 -0
README.md CHANGED
@@ -1,3 +1,9 @@
1
  ---
2
  license: apache-2.0
 
 
3
  ---
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ datasets:
4
+ - Ejafa/ye-pop
5
  ---
6
+
7
+ A ViT-B/32 CLIP model trained for 4 epochs on the [ye-pop](https://huggingface.co/datasets/Ejafa/ye-pop) dataset (491,520 images and [LLaVA 1.5](https://github.com/haotian-liu/LLaVA)-generated detailed captions). Research artifact of [clip-synthetic-captions](https://github.com/nopperl/clip-synthetic-captions). Outperforms the CLIP model trained using the original alt-texts on the [DataComp benchmark suite](https://datacomp.ai) (38 image classification and retrieval tasks).
8
+
9
+ Note: likely not directly useful as it is severely undertrained.
checkpoints/epoch_latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9a7b8e3563022b8ab3a6271cd7615e6b631c0e4b1858d616e5188dd167b856
3
+ size 1815639097
eval_results.jsonl ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.014790468364831553, "acc5": 0.13081347576006574, "mean_per_class_recall": 0.01362089703906384, "main_metric": 0.01362089703906384}}
2
+ {"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.1742, "acc5": 0.6409, "mean_per_class_recall": 0.1742, "main_metric": 0.1742}}
3
+ {"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.0208, "acc5": 0.0973, "mean_per_class_recall": 0.0208, "main_metric": 0.0208}}
4
+ {"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.10233333333333333, "acc5": 0.6134666666666667, "mean_per_class_recall": 0.1057952496046836, "main_metric": 0.10233333333333333}}
5
+ {"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.22433333333333333, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.18210081266426092, "main_metric": 0.22433333333333333}}
6
+ {"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.004218009478672986, "acc5": 0.023601895734597155, "mean_per_class_recall": 0.004218009478672986, "main_metric": 0.004218009478672986}}
7
+ {"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.018085106382978722, "acc5": 0.12127659574468085, "mean_per_class_recall": 0.018085106382978725, "main_metric": 0.018085106382978722}}
8
+ {"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.13425925925925927, "acc5": 0.6325925925925926, "mean_per_class_recall": 0.12178053872907779, "main_metric": 0.13425925925925927}}
9
+ {"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.013501350135013501, "acc5": 0.05490549054905491, "mean_per_class_recall": 0.013520499108734403, "main_metric": 0.013520499108734403}}
10
+ {"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.014336633663366337, "acc5": 0.0594059405940594, "mean_per_class_recall": 0.014336633663366335, "main_metric": 0.014336633663366337}}
11
+ {"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.04568487727632621, "acc5": 0.19572446555819478, "mean_per_class_recall": 0.0628135544098638, "main_metric": 0.04568487727632621}}
12
+ {"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.0019, "acc5": 0.00954, "mean_per_class_recall": 0.0019, "main_metric": 0.0019}}
13
+ {"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.0009628799937118042, "acc5": 0.005089508538190964, "mean_per_class_recall": 0.0009698039215686274, "main_metric": 0.0009628799937118042}}
14
+ {"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.0026, "acc5": 0.0108, "mean_per_class_recall": 0.0026, "main_metric": 0.0026}}
15
+ {"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.008, "acc5": 0.03373333333333333, "mean_per_class_recall": 0.0074894341555493075, "main_metric": 0.008}}
16
+ {"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.011, "acc5": 0.0435, "mean_per_class_recall": 0.015999007936507933, "main_metric": 0.011}}
17
+ {"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.008266666666666667, "acc5": 0.035366666666666664, "mean_per_class_recall": 0.009669698988598539, "main_metric": 0.008266666666666667}}
18
+ {"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.2883263009845288, "acc5": null, "mean_per_class_recall": 0.23301884109911242, "main_metric": 0.2883263009845288}}
19
+ {"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.1007, "acc5": 0.4754, "mean_per_class_recall": 0.11013215859030837, "main_metric": 0.1007}}
20
+ {"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.010606223753634111, "acc5": 0.05276192527188543, "mean_per_class_recall": 0.011203938830908448, "main_metric": 0.010606223753634111}}
21
+ {"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.010245568385103269, "acc5": 0.04325906651488047, "mean_per_class_recall": 0.01758932788452204, "main_metric": 0.01758932788452204}}
22
+ {"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.033251567184518944, "acc5": 0.16053420550558736, "mean_per_class_recall": 0.033282305889080564, "main_metric": 0.033282305889080564}}
23
+ {"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.1465678418803419, "acc5": 0.5253071581196581, "mean_per_class_recall": 0.07336279386291171, "main_metric": 0.1465678418803419}}
24
+ {"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.50018310546875, "acc5": null, "mean_per_class_recall": 0.4999694954548228, "main_metric": 0.50018310546875}}
25
+ {"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.49917627677100496, "acc5": null, "mean_per_class_recall": 0.5, "main_metric": 0.49917627677100496}}
26
+ {"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.053492063492063494, "acc5": 0.18603174603174602, "mean_per_class_recall": 0.054165116736977964, "main_metric": 0.053492063492063494}}
27
+ {"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.008456659619450317, "acc5": 0.03258301206317622, "mean_per_class_recall": 0.008027232987922588, "main_metric": 0.008456659619450317}}
28
+ {"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.20875, "acc5": 0.747375, "mean_per_class_recall": 0.20875, "main_metric": 0.20875}}
29
+ {"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.019456755613586625, "acc5": 0.058085219853982385, "mean_per_class_recall": 0.011161602915105585, "main_metric": 0.019456755613586625}}
30
+ {"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.11359096496619545, "acc5": 0.4724185617701291, "mean_per_class_recall": 0.10201643137809854, "main_metric": 0.11359096496619545}}
31
+ {"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.006399999838322401, "text_retrieval_recall@1": 0.00800000037997961, "image_retrieval_recall@5": 0.023600000888109207, "text_retrieval_recall@5": 0.03200000151991844, "image_retrieval_recall@10": 0.04399999976158142, "text_retrieval_recall@10": 0.05900000035762787, "mean_recall@1": 0.007200000109151006, "main_metric": 0.007200000109151006}}
32
+ {"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.0010795681737363338, "text_retrieval_recall@1": 0.002400000113993883, "image_retrieval_recall@5": 0.00535785686224699, "text_retrieval_recall@5": 0.009600000455975533, "image_retrieval_recall@10": 0.010555777698755264, "text_retrieval_recall@10": 0.016200000420212746, "mean_recall@1": 0.0017397841438651085, "main_metric": 0.0017397841438651085}}
33
+ {"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.2770605295163252, "jaccard_score_5": 0.34184343434343434, "jaccard_score_6": 0.2908327333813295, "jaccard_score_10": 0.17132796780684106, "jaccard_score_12": 0.13780596350689808, "jaccard_score_5-6": 0.31568844592100404, "jaccard_score_10-12": 0.154527712724434, "main_metric": 0.154527712724434}}
34
+ {"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.002336940010749924, "acc5": 0.06358813769250543, "mean_per_class_recall": 0.0070409559694376385, "acc_avg": 0.0023369400296360254, "recall-macro_all": 0.0070409559694376385, "F1-macro_all": 0.0005475987946203356, "main_metric": 0.0005475987946203356}}
35
+ {"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.4999882427634209, "acc5": null, "mean_per_class_recall": 0.4999882427634209, "acc_avg": 0.49998822808265686, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.9929133653640747, "count_slide:20": 3810.0, "acc_slide:21": 0.9972929358482361, "count_slide:21": 3694.0, "acc_slide:22": 0.4134535491466522, "count_slide:22": 7210.0, "acc_slide:23": 0.47787442803382874, "count_slide:23": 5288.0, "acc_slide:24": 0.9760580062866211, "count_slide:24": 7727.0, "acc_slide:25": 0.7625749707221985, "count_slide:25": 4334.0, "acc_slide:26": 0.8702490329742432, "count_slide:26": 3815.0, "acc_slide:27": 0.9758560061454773, "count_slide:27": 4556.0, "acc_slide:28": 0.15308363735675812, "count_slide:28": 31878.0, "acc_slide:29": 0.4754355549812317, "count_slide:29": 12742.0, "acc_wg": 0.15308363735675812, "main_metric": 0.4999882427634209}}
36
+ {"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.01623846571376877, "acc5": 0.08829383028767866, "mean_per_class_recall": 0.017711475508026393, "acc_avg": 0.016238465905189514, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.01716899499297142, "count_year:14": 15959.0, "acc_year:15": 0.013823386281728745, "count_year:15": 6149.0, "acc_worst_year": 0.013823386281728745, "acc_region:0": 0.018335683271288872, "count_region:0": 4963.0, "acc_region:1": 0.01519289892166853, "count_region:1": 5858.0, "acc_region:2": 0.015426147729158401, "count_region:2": 2593.0, "acc_region:3": 0.015204386785626411, "count_region:3": 8024.0, "acc_region:4": 0.025525525212287903, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
37
+ {"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.02226662860405367, "acc5": 0.11333143020268341, "mean_per_class_recall": 0.020258544402102775, "acc_top5_avg": 0.11333142966032028, "acc_top5_income_ds:0": 0.12616822123527527, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.1187782809138298, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.08990011364221573, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.11948955804109573, "count_income_ds:3": 862.0, "acc_top5_wg": 0.08990011364221573, "main_metric": 0.08990011364221573}}
38
+ {"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.05172966047405509, "acc5": 0.24143177450352338, "mean_per_class_recall": 0.04892415395683693, "acc_avg": 0.05172966048121452, "acc_region:0": 0.04634655639529228, "count_region:0": 2395.0, "acc_region:1": 0.05522388219833374, "count_region:1": 2010.0, "acc_region:2": 0.06020696088671684, "count_region:2": 2126.0, "acc_region:3": 0.048793014138936996, "count_region:3": 1947.0, "acc_region:4": 0.05179283022880554, "count_region:4": 1757.0, "acc_region:5": 0.04882378876209259, "count_region:5": 2253.0, "acc_wg": 0.04634655639529228, "main_metric": 0.04634655639529228}}
39
+ {"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.799525260925293, "acc_race_race_binary:0": 0.03165467455983162, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9800428748130798, "count_race_binary:1": 8869.0, "acc_race_wg": 0.03165467455983162, "acc_gender_avg": 0.535420835018158, "acc_gender_race_binary:0": 0.5376498699188232, "acc_gender_race_binary:1": 0.5348968505859375, "acc_gender_wg": 0.5348968505859375, "acc_age_avg": 0.11055321991443634, "acc_age_race_binary:0": 0.13285371661186218, "acc_age_race_binary:1": 0.10531063377857208, "acc_age_wg": 0.10531063377857208, "acc_gender_x_avg": 0.535420835018158, "acc_gender_x_race:0_gender:0": 0.978723406791687, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.02906208671629429, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.8859180212020874, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.1318795382976532, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.9787516593933105, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.03276539966464043, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.9470365643501282, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.08795180916786194, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.9212791919708252, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.12121212482452393, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.9646258354187012, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.052941177040338516, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.9433719515800476, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.10219922661781311, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.02906208671629429, "toxicity_crime_avg": 0.0035603432916104794, "toxicity_crime_race:0": 0.0038560412358492613, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.0047961631789803505, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.0019788919016718864, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.006777572445571423, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.0016542597441002727, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.0014134275261312723, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.003225806402042508, "count_race:6": 1550.0, "toxicity_crime_wg": 0.0014134275261312723, "toxicity_nonhuman_avg": 0.022548841312527657, "toxicity_nonhuman_race:0": 0.013496143743395805, "toxicity_nonhuman_race:1": 0.03693045675754547, "toxicity_nonhuman_race:2": 0.012532981112599373, "toxicity_nonhuman_race:3": 0.02156500332057476, "toxicity_nonhuman_race:4": 0.019851116463541985, "toxicity_nonhuman_race:5": 0.016961131244897842, "toxicity_nonhuman_race:6": 0.03032258152961731, "toxicity_nonhuman_wg": 0.012532981112599373, "main_metric": null}}
40
+ {"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5735982656478882, "acc_race_race_binary:0": 0.0782056376338005, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9398987293243408, "count_race_binary:1": 13627.0, "acc_race_wg": 0.0782056376338005, "acc_gender_avg": 0.5255452990531921, "acc_gender_race_binary:0": 0.5173680186271667, "acc_gender_race_binary:1": 0.5315917134284973, "acc_gender_wg": 0.5173680186271667, "acc_age_avg": 0.09635911136865616, "acc_age_race_binary:0": 0.11383485794067383, "acc_age_race_binary:1": 0.08343729376792908, "acc_age_wg": 0.08343729376792908, "acc_gender_x_avg": 0.5255452990531921, "acc_gender_x_race:0_gender:0": 0.9607419967651367, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.0520833320915699, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.8555514812469482, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.11478260904550552, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.946483850479126, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.09101516753435135, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.9295238256454468, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.18020440638065338, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.9118421077728271, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.1223175972700119, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.0520833320915699, "toxicity_crime_avg": 0.002446947619318962, "toxicity_crime_race:0": 0.002651347778737545, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.0018856689566746354, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.0037735849618911743, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.0026208502240478992, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.0017730495892465115, "count_race:4": 1692.0, "toxicity_crime_wg": 0.0017730495892465115, "toxicity_nonhuman_avg": 0.021980339661240578, "toxicity_nonhuman_race:0": 0.010826337151229382, "toxicity_nonhuman_race:1": 0.02729257568717003, "toxicity_nonhuman_race:2": 0.01861635223031044, "toxicity_nonhuman_race:3": 0.0259172972291708, "toxicity_nonhuman_race:4": 0.02009456232190132, "toxicity_nonhuman_wg": 0.010826337151229382, "main_metric": null}}
params.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 1
2
+ aug_cfg: {}
3
+ batch_size: 1024
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ checkpoint_path: $HOME/clip-synthetic-captions/output/ye-pop-llava_caption/checkpoints
7
+ coca_caption_loss_weight: 2.0
8
+ coca_contrastive_loss_weight: 1.0
9
+ copy_codebase: False
10
+ csv_caption_key: title
11
+ csv_img_key: filepath
12
+ csv_separator:
13
+ dataset_resampled: False
14
+ dataset_type: webdataset
15
+ ddp_static_graph: True
16
+ debug: False
17
+ delete_previous_checkpoint: False
18
+ device: cuda:0
19
+ dist_backend: nccl
20
+ dist_url: env://
21
+ distill: False
22
+ distill_model: None
23
+ distill_pretrained: None
24
+ distributed: True
25
+ epochs: 4
26
+ epochs_cooldown: None
27
+ eps: 1e-06
28
+ force_custom_text: False
29
+ force_image_size: None
30
+ force_patch_dropout: None
31
+ force_quick_gelu: False
32
+ gather_with_grad: True
33
+ grad_checkpointing: True
34
+ grad_clip_norm: None
35
+ horovod: False
36
+ image_mean: None
37
+ image_std: None
38
+ imagenet_v2: None
39
+ imagenet_val: None
40
+ local_loss: True
41
+ local_rank: 0
42
+ lock_image: False
43
+ lock_image_freeze_bn_stats: False
44
+ lock_image_unlocked_groups: 0
45
+ lock_text: False
46
+ lock_text_freeze_layer_norm: False
47
+ lock_text_unlocked_layers: 0
48
+ log_every_n_steps: 100
49
+ log_level: 20
50
+ log_local: False
51
+ log_path: $HOME/clip-synthetic-captions/output/ye-pop-llava_caption/out.log
52
+ logs: $HOME/clip-synthetic-captions/output
53
+ lr: 0.0005
54
+ lr_cooldown_end: 0.0
55
+ lr_cooldown_power: 1.0
56
+ lr_scheduler: cosine
57
+ model: ViT-B-32
58
+ name: ye-pop-llava_caption
59
+ no_set_device_rank: False
60
+ precision: amp
61
+ pretrained:
62
+ pretrained_image: False
63
+ rank: 0
64
+ remote_sync: None
65
+ remote_sync_frequency: 300
66
+ remote_sync_protocol: s3
67
+ report_to:
68
+ resume: None
69
+ save_frequency: 0
70
+ save_most_recent: True
71
+ seed: 0
72
+ skip_scheduler: False
73
+ tensorboard: False
74
+ tensorboard_path:
75
+ torchscript: False
76
+ trace: False
77
+ train_data: $HOME/clip-synthetic-captions/data/postprocessed/ye-pop-img2dataset-llava_caption/shards/{00000000..00000049}.tar
78
+ train_data_upsampling_factors: None
79
+ train_num_samples: 122880
80
+ use_bn_sync: False
81
+ val_data: None
82
+ val_frequency: 1
83
+ val_num_samples: None
84
+ wandb: False
85
+ wandb_notes:
86
+ wandb_project_name: open-clip
87
+ warmup: 500
88
+ wd: 0.2
89
+ workers: 2
90
+ world_size: 4
91
+ zeroshot_frequency: 2