add data
Browse files- README.md +6 -0
- checkpoints/epoch_latest.pt +3 -0
- eval_results.jsonl +40 -0
- params.txt +91 -0
README.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- Ejafa/ye-pop
|
5 |
---
|
6 |
+
|
7 |
+
A ViT-B/32 CLIP model trained for 4 epochs on the [ye-pop](https://huggingface.co/datasets/Ejafa/ye-pop) dataset (491,520 images and [LLaVA 1.5](https://github.com/haotian-liu/LLaVA)-generated detailed captions). Research artifact of [clip-synthetic-captions](https://github.com/nopperl/clip-synthetic-captions). Outperforms the CLIP model trained using the original alt-texts on the [DataComp benchmark suite](https://datacomp.ai) (38 image classification and retrieval tasks).
|
8 |
+
|
9 |
+
Note: likely not directly useful as it is severely undertrained.
|
checkpoints/epoch_latest.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b9a7b8e3563022b8ab3a6271cd7615e6b631c0e4b1858d616e5188dd167b856
|
3 |
+
size 1815639097
|
eval_results.jsonl
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.014790468364831553, "acc5": 0.13081347576006574, "mean_per_class_recall": 0.01362089703906384, "main_metric": 0.01362089703906384}}
|
2 |
+
{"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.1742, "acc5": 0.6409, "mean_per_class_recall": 0.1742, "main_metric": 0.1742}}
|
3 |
+
{"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.0208, "acc5": 0.0973, "mean_per_class_recall": 0.0208, "main_metric": 0.0208}}
|
4 |
+
{"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.10233333333333333, "acc5": 0.6134666666666667, "mean_per_class_recall": 0.1057952496046836, "main_metric": 0.10233333333333333}}
|
5 |
+
{"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.22433333333333333, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.18210081266426092, "main_metric": 0.22433333333333333}}
|
6 |
+
{"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.004218009478672986, "acc5": 0.023601895734597155, "mean_per_class_recall": 0.004218009478672986, "main_metric": 0.004218009478672986}}
|
7 |
+
{"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.018085106382978722, "acc5": 0.12127659574468085, "mean_per_class_recall": 0.018085106382978725, "main_metric": 0.018085106382978722}}
|
8 |
+
{"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.13425925925925927, "acc5": 0.6325925925925926, "mean_per_class_recall": 0.12178053872907779, "main_metric": 0.13425925925925927}}
|
9 |
+
{"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.013501350135013501, "acc5": 0.05490549054905491, "mean_per_class_recall": 0.013520499108734403, "main_metric": 0.013520499108734403}}
|
10 |
+
{"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.014336633663366337, "acc5": 0.0594059405940594, "mean_per_class_recall": 0.014336633663366335, "main_metric": 0.014336633663366337}}
|
11 |
+
{"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.04568487727632621, "acc5": 0.19572446555819478, "mean_per_class_recall": 0.0628135544098638, "main_metric": 0.04568487727632621}}
|
12 |
+
{"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.0019, "acc5": 0.00954, "mean_per_class_recall": 0.0019, "main_metric": 0.0019}}
|
13 |
+
{"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.0009628799937118042, "acc5": 0.005089508538190964, "mean_per_class_recall": 0.0009698039215686274, "main_metric": 0.0009628799937118042}}
|
14 |
+
{"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.0026, "acc5": 0.0108, "mean_per_class_recall": 0.0026, "main_metric": 0.0026}}
|
15 |
+
{"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.008, "acc5": 0.03373333333333333, "mean_per_class_recall": 0.0074894341555493075, "main_metric": 0.008}}
|
16 |
+
{"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.011, "acc5": 0.0435, "mean_per_class_recall": 0.015999007936507933, "main_metric": 0.011}}
|
17 |
+
{"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.008266666666666667, "acc5": 0.035366666666666664, "mean_per_class_recall": 0.009669698988598539, "main_metric": 0.008266666666666667}}
|
18 |
+
{"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.2883263009845288, "acc5": null, "mean_per_class_recall": 0.23301884109911242, "main_metric": 0.2883263009845288}}
|
19 |
+
{"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.1007, "acc5": 0.4754, "mean_per_class_recall": 0.11013215859030837, "main_metric": 0.1007}}
|
20 |
+
{"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.010606223753634111, "acc5": 0.05276192527188543, "mean_per_class_recall": 0.011203938830908448, "main_metric": 0.010606223753634111}}
|
21 |
+
{"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.010245568385103269, "acc5": 0.04325906651488047, "mean_per_class_recall": 0.01758932788452204, "main_metric": 0.01758932788452204}}
|
22 |
+
{"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.033251567184518944, "acc5": 0.16053420550558736, "mean_per_class_recall": 0.033282305889080564, "main_metric": 0.033282305889080564}}
|
23 |
+
{"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.1465678418803419, "acc5": 0.5253071581196581, "mean_per_class_recall": 0.07336279386291171, "main_metric": 0.1465678418803419}}
|
24 |
+
{"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.50018310546875, "acc5": null, "mean_per_class_recall": 0.4999694954548228, "main_metric": 0.50018310546875}}
|
25 |
+
{"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.49917627677100496, "acc5": null, "mean_per_class_recall": 0.5, "main_metric": 0.49917627677100496}}
|
26 |
+
{"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.053492063492063494, "acc5": 0.18603174603174602, "mean_per_class_recall": 0.054165116736977964, "main_metric": 0.053492063492063494}}
|
27 |
+
{"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.008456659619450317, "acc5": 0.03258301206317622, "mean_per_class_recall": 0.008027232987922588, "main_metric": 0.008456659619450317}}
|
28 |
+
{"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.20875, "acc5": 0.747375, "mean_per_class_recall": 0.20875, "main_metric": 0.20875}}
|
29 |
+
{"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.019456755613586625, "acc5": 0.058085219853982385, "mean_per_class_recall": 0.011161602915105585, "main_metric": 0.019456755613586625}}
|
30 |
+
{"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.11359096496619545, "acc5": 0.4724185617701291, "mean_per_class_recall": 0.10201643137809854, "main_metric": 0.11359096496619545}}
|
31 |
+
{"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.006399999838322401, "text_retrieval_recall@1": 0.00800000037997961, "image_retrieval_recall@5": 0.023600000888109207, "text_retrieval_recall@5": 0.03200000151991844, "image_retrieval_recall@10": 0.04399999976158142, "text_retrieval_recall@10": 0.05900000035762787, "mean_recall@1": 0.007200000109151006, "main_metric": 0.007200000109151006}}
|
32 |
+
{"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.0010795681737363338, "text_retrieval_recall@1": 0.002400000113993883, "image_retrieval_recall@5": 0.00535785686224699, "text_retrieval_recall@5": 0.009600000455975533, "image_retrieval_recall@10": 0.010555777698755264, "text_retrieval_recall@10": 0.016200000420212746, "mean_recall@1": 0.0017397841438651085, "main_metric": 0.0017397841438651085}}
|
33 |
+
{"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.2770605295163252, "jaccard_score_5": 0.34184343434343434, "jaccard_score_6": 0.2908327333813295, "jaccard_score_10": 0.17132796780684106, "jaccard_score_12": 0.13780596350689808, "jaccard_score_5-6": 0.31568844592100404, "jaccard_score_10-12": 0.154527712724434, "main_metric": 0.154527712724434}}
|
34 |
+
{"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.002336940010749924, "acc5": 0.06358813769250543, "mean_per_class_recall": 0.0070409559694376385, "acc_avg": 0.0023369400296360254, "recall-macro_all": 0.0070409559694376385, "F1-macro_all": 0.0005475987946203356, "main_metric": 0.0005475987946203356}}
|
35 |
+
{"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.4999882427634209, "acc5": null, "mean_per_class_recall": 0.4999882427634209, "acc_avg": 0.49998822808265686, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.9929133653640747, "count_slide:20": 3810.0, "acc_slide:21": 0.9972929358482361, "count_slide:21": 3694.0, "acc_slide:22": 0.4134535491466522, "count_slide:22": 7210.0, "acc_slide:23": 0.47787442803382874, "count_slide:23": 5288.0, "acc_slide:24": 0.9760580062866211, "count_slide:24": 7727.0, "acc_slide:25": 0.7625749707221985, "count_slide:25": 4334.0, "acc_slide:26": 0.8702490329742432, "count_slide:26": 3815.0, "acc_slide:27": 0.9758560061454773, "count_slide:27": 4556.0, "acc_slide:28": 0.15308363735675812, "count_slide:28": 31878.0, "acc_slide:29": 0.4754355549812317, "count_slide:29": 12742.0, "acc_wg": 0.15308363735675812, "main_metric": 0.4999882427634209}}
|
36 |
+
{"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.01623846571376877, "acc5": 0.08829383028767866, "mean_per_class_recall": 0.017711475508026393, "acc_avg": 0.016238465905189514, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.01716899499297142, "count_year:14": 15959.0, "acc_year:15": 0.013823386281728745, "count_year:15": 6149.0, "acc_worst_year": 0.013823386281728745, "acc_region:0": 0.018335683271288872, "count_region:0": 4963.0, "acc_region:1": 0.01519289892166853, "count_region:1": 5858.0, "acc_region:2": 0.015426147729158401, "count_region:2": 2593.0, "acc_region:3": 0.015204386785626411, "count_region:3": 8024.0, "acc_region:4": 0.025525525212287903, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
|
37 |
+
{"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.02226662860405367, "acc5": 0.11333143020268341, "mean_per_class_recall": 0.020258544402102775, "acc_top5_avg": 0.11333142966032028, "acc_top5_income_ds:0": 0.12616822123527527, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.1187782809138298, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.08990011364221573, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.11948955804109573, "count_income_ds:3": 862.0, "acc_top5_wg": 0.08990011364221573, "main_metric": 0.08990011364221573}}
|
38 |
+
{"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.05172966047405509, "acc5": 0.24143177450352338, "mean_per_class_recall": 0.04892415395683693, "acc_avg": 0.05172966048121452, "acc_region:0": 0.04634655639529228, "count_region:0": 2395.0, "acc_region:1": 0.05522388219833374, "count_region:1": 2010.0, "acc_region:2": 0.06020696088671684, "count_region:2": 2126.0, "acc_region:3": 0.048793014138936996, "count_region:3": 1947.0, "acc_region:4": 0.05179283022880554, "count_region:4": 1757.0, "acc_region:5": 0.04882378876209259, "count_region:5": 2253.0, "acc_wg": 0.04634655639529228, "main_metric": 0.04634655639529228}}
|
39 |
+
{"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.799525260925293, "acc_race_race_binary:0": 0.03165467455983162, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9800428748130798, "count_race_binary:1": 8869.0, "acc_race_wg": 0.03165467455983162, "acc_gender_avg": 0.535420835018158, "acc_gender_race_binary:0": 0.5376498699188232, "acc_gender_race_binary:1": 0.5348968505859375, "acc_gender_wg": 0.5348968505859375, "acc_age_avg": 0.11055321991443634, "acc_age_race_binary:0": 0.13285371661186218, "acc_age_race_binary:1": 0.10531063377857208, "acc_age_wg": 0.10531063377857208, "acc_gender_x_avg": 0.535420835018158, "acc_gender_x_race:0_gender:0": 0.978723406791687, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.02906208671629429, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.8859180212020874, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.1318795382976532, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.9787516593933105, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.03276539966464043, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.9470365643501282, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.08795180916786194, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.9212791919708252, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.12121212482452393, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.9646258354187012, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.052941177040338516, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.9433719515800476, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.10219922661781311, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.02906208671629429, "toxicity_crime_avg": 0.0035603432916104794, "toxicity_crime_race:0": 0.0038560412358492613, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.0047961631789803505, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.0019788919016718864, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.006777572445571423, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.0016542597441002727, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.0014134275261312723, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.003225806402042508, "count_race:6": 1550.0, "toxicity_crime_wg": 0.0014134275261312723, "toxicity_nonhuman_avg": 0.022548841312527657, "toxicity_nonhuman_race:0": 0.013496143743395805, "toxicity_nonhuman_race:1": 0.03693045675754547, "toxicity_nonhuman_race:2": 0.012532981112599373, "toxicity_nonhuman_race:3": 0.02156500332057476, "toxicity_nonhuman_race:4": 0.019851116463541985, "toxicity_nonhuman_race:5": 0.016961131244897842, "toxicity_nonhuman_race:6": 0.03032258152961731, "toxicity_nonhuman_wg": 0.012532981112599373, "main_metric": null}}
|
40 |
+
{"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5735982656478882, "acc_race_race_binary:0": 0.0782056376338005, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9398987293243408, "count_race_binary:1": 13627.0, "acc_race_wg": 0.0782056376338005, "acc_gender_avg": 0.5255452990531921, "acc_gender_race_binary:0": 0.5173680186271667, "acc_gender_race_binary:1": 0.5315917134284973, "acc_gender_wg": 0.5173680186271667, "acc_age_avg": 0.09635911136865616, "acc_age_race_binary:0": 0.11383485794067383, "acc_age_race_binary:1": 0.08343729376792908, "acc_age_wg": 0.08343729376792908, "acc_gender_x_avg": 0.5255452990531921, "acc_gender_x_race:0_gender:0": 0.9607419967651367, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.0520833320915699, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.8555514812469482, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.11478260904550552, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.946483850479126, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.09101516753435135, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.9295238256454468, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.18020440638065338, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.9118421077728271, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.1223175972700119, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.0520833320915699, "toxicity_crime_avg": 0.002446947619318962, "toxicity_crime_race:0": 0.002651347778737545, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.0018856689566746354, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.0037735849618911743, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.0026208502240478992, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.0017730495892465115, "count_race:4": 1692.0, "toxicity_crime_wg": 0.0017730495892465115, "toxicity_nonhuman_avg": 0.021980339661240578, "toxicity_nonhuman_race:0": 0.010826337151229382, "toxicity_nonhuman_race:1": 0.02729257568717003, "toxicity_nonhuman_race:2": 0.01861635223031044, "toxicity_nonhuman_race:3": 0.0259172972291708, "toxicity_nonhuman_race:4": 0.02009456232190132, "toxicity_nonhuman_wg": 0.010826337151229382, "main_metric": null}}
|
params.txt
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accum_freq: 1
|
2 |
+
aug_cfg: {}
|
3 |
+
batch_size: 1024
|
4 |
+
beta1: 0.9
|
5 |
+
beta2: 0.98
|
6 |
+
checkpoint_path: $HOME/clip-synthetic-captions/output/ye-pop-llava_caption/checkpoints
|
7 |
+
coca_caption_loss_weight: 2.0
|
8 |
+
coca_contrastive_loss_weight: 1.0
|
9 |
+
copy_codebase: False
|
10 |
+
csv_caption_key: title
|
11 |
+
csv_img_key: filepath
|
12 |
+
csv_separator:
|
13 |
+
dataset_resampled: False
|
14 |
+
dataset_type: webdataset
|
15 |
+
ddp_static_graph: True
|
16 |
+
debug: False
|
17 |
+
delete_previous_checkpoint: False
|
18 |
+
device: cuda:0
|
19 |
+
dist_backend: nccl
|
20 |
+
dist_url: env://
|
21 |
+
distill: False
|
22 |
+
distill_model: None
|
23 |
+
distill_pretrained: None
|
24 |
+
distributed: True
|
25 |
+
epochs: 4
|
26 |
+
epochs_cooldown: None
|
27 |
+
eps: 1e-06
|
28 |
+
force_custom_text: False
|
29 |
+
force_image_size: None
|
30 |
+
force_patch_dropout: None
|
31 |
+
force_quick_gelu: False
|
32 |
+
gather_with_grad: True
|
33 |
+
grad_checkpointing: True
|
34 |
+
grad_clip_norm: None
|
35 |
+
horovod: False
|
36 |
+
image_mean: None
|
37 |
+
image_std: None
|
38 |
+
imagenet_v2: None
|
39 |
+
imagenet_val: None
|
40 |
+
local_loss: True
|
41 |
+
local_rank: 0
|
42 |
+
lock_image: False
|
43 |
+
lock_image_freeze_bn_stats: False
|
44 |
+
lock_image_unlocked_groups: 0
|
45 |
+
lock_text: False
|
46 |
+
lock_text_freeze_layer_norm: False
|
47 |
+
lock_text_unlocked_layers: 0
|
48 |
+
log_every_n_steps: 100
|
49 |
+
log_level: 20
|
50 |
+
log_local: False
|
51 |
+
log_path: $HOME/clip-synthetic-captions/output/ye-pop-llava_caption/out.log
|
52 |
+
logs: $HOME/clip-synthetic-captions/output
|
53 |
+
lr: 0.0005
|
54 |
+
lr_cooldown_end: 0.0
|
55 |
+
lr_cooldown_power: 1.0
|
56 |
+
lr_scheduler: cosine
|
57 |
+
model: ViT-B-32
|
58 |
+
name: ye-pop-llava_caption
|
59 |
+
no_set_device_rank: False
|
60 |
+
precision: amp
|
61 |
+
pretrained:
|
62 |
+
pretrained_image: False
|
63 |
+
rank: 0
|
64 |
+
remote_sync: None
|
65 |
+
remote_sync_frequency: 300
|
66 |
+
remote_sync_protocol: s3
|
67 |
+
report_to:
|
68 |
+
resume: None
|
69 |
+
save_frequency: 0
|
70 |
+
save_most_recent: True
|
71 |
+
seed: 0
|
72 |
+
skip_scheduler: False
|
73 |
+
tensorboard: False
|
74 |
+
tensorboard_path:
|
75 |
+
torchscript: False
|
76 |
+
trace: False
|
77 |
+
train_data: $HOME/clip-synthetic-captions/data/postprocessed/ye-pop-img2dataset-llava_caption/shards/{00000000..00000049}.tar
|
78 |
+
train_data_upsampling_factors: None
|
79 |
+
train_num_samples: 122880
|
80 |
+
use_bn_sync: False
|
81 |
+
val_data: None
|
82 |
+
val_frequency: 1
|
83 |
+
val_num_samples: None
|
84 |
+
wandb: False
|
85 |
+
wandb_notes:
|
86 |
+
wandb_project_name: open-clip
|
87 |
+
warmup: 500
|
88 |
+
wd: 0.2
|
89 |
+
workers: 2
|
90 |
+
world_size: 4
|
91 |
+
zeroshot_frequency: 2
|