From aeca89926df9c557f9f8a8544698eb784529baf4 Mon Sep 17 00:00:00 2001 From: Gabriel Ilharco Date: Wed, 1 Nov 2023 05:22:50 +0000 Subject: [PATCH 1/2] Add DFN models --- docs/PRETRAINED.md | 18 ++++++++++++++++++ docs/model_profile.csv | 11 ++++++----- docs/openclip_classification_results.csv | 12 ++++++++---- docs/openclip_results.csv | 10 +++++++--- docs/openclip_retrieval_results.csv | 14 +++++++++----- .../model_configs/ViT-H-14-378-quickgelu.json | 18 ++++++++++++++++++ src/open_clip/pretrained.py | 17 +++++++++++++++++ src/training/profiler.py | 8 +++++--- 8 files changed, 88 insertions(+), 20 deletions(-) create mode 100644 src/open_clip/model_configs/ViT-H-14-378-quickgelu.json diff --git a/docs/PRETRAINED.md b/docs/PRETRAINED.md index 3ac650397..dddc39af9 100644 --- a/docs/PRETRAINED.md +++ b/docs/PRETRAINED.md @@ -270,3 +270,21 @@ If you use EVA models, please cite the following: year={2023} } ``` + +### DFN + +Data Filtering Network models are described in https://arxiv.org/abs/2309.17425. +These models were developed by Alex Fang, Albin Madappally Jose, Amit Jain, Ludwig Schmidt, Alexander Toshev and Vaishaal Shankar from Apple and the University of Washington. + +Models are licensed under the following: https://huggingface.co/apple/DFN5B-CLIP-ViT-H-14-384/blob/main/LICENSE. + +If you use DFN models, please cite the following: + +```bibtext +@article{fang2023data, + title={Data Filtering Networks}, + author={Fang, Alex and Jose, Albin Madappally and Jain, Amit and Schmidt, Ludwig and Toshev, Alexander and Shankar, Vaishaal}, + journal={arXiv preprint arXiv:2309.17425}, + year={2023} +} +``` diff --git a/docs/model_profile.csv b/docs/model_profile.csv index 0c2032433..b5376adfb 100644 --- a/docs/model_profile.csv +++ b/docs/model_profile.csv @@ -9,36 +9,36 @@ ViT-B-32,224,768,512,512,151.28,87.85,63.43,14.78,8.82,5.96 ViT-B-32-quickgelu,224,768,512,512,151.28,87.85,63.43,14.78,8.82,5.96 convnext_tiny,224,768,512,1024,92.3,28.61,63.69,14.87,8.91,5.96 ViT-B-32-256,256,768,512,512,151.29,87.86,63.43,17.46,11.5,5.96 -RN50-quickgelu,224,64,512,1024,102.01,38.32,63.69,18.18,12.22,5.96 RN50,224,64,512,1024,102.01,38.32,63.69,18.18,12.22,5.96 +RN50-quickgelu,224,64,512,1024,102.01,38.32,63.69,18.18,12.22,5.96 ViT-M-16-alt,224,512,384,384,78.98,38.53,40.44,19.36,15.98,3.38 ViT-M-16,224,512,512,512,102.02,38.59,63.43,21.94,15.98,5.96 vit_relpos_medium_patch16_cls_224,224,768,512,512,101.94,38.51,63.43,21.99,16.03,5.96 mt5-base-ViT-B-32,224,768,512,512,365.71,87.85,277.86,22.12,8.82,13.3 convnext_small,224,768,512,512,113.28,49.85,63.43,23.33,17.37,5.96 ViT-B-32-plus-256,256,896,640,640,210.3,119.13,91.16,24.83,15.56,9.27 -RN101-quickgelu,224,64,512,512,119.69,56.26,63.43,25.5,19.54,5.96 RN101,224,64,512,512,119.69,56.26,63.43,25.5,19.54,5.96 +RN101-quickgelu,224,64,512,512,119.69,56.26,63.43,25.5,19.54,5.96 vit_medium_patch16_gap_256,256,768,512,512,102.04,38.61,63.43,27.1,21.14,5.96 coca_ViT-B-32,224,768,512,512,253.56,89.16,63.43,33.34,9.19,5.96 convnext_base,224,768,512,512,151.52,88.09,63.43,36.67,30.71,5.96 swin_base_patch4_window7_224,224,768,640,640,178.56,87.4,91.16,40.13,30.86,9.27 ViT-B-16,224,768,512,512,149.62,86.19,63.43,41.09,35.13,5.96 -EVA02-B-16,224,768,512,512,149.69,86.26,63.43,41.09,35.13,5.96 ViT-B-16-quickgelu,224,768,512,512,149.62,86.19,63.43,41.09,35.13,5.96 +EVA02-B-16,224,768,512,512,149.69,86.26,63.43,41.09,35.13,5.96 ViT-B-16-SigLIP,224,768,768,768,203.16,92.88,110.27,46.44,35.42,11.02 convnext_base_w,256,768,640,640,179.39,88.22,91.16,49.38,40.11,9.27 RN50x4,288,80,640,640,178.3,87.14,91.16,51.82,42.56,9.27 coca_roberta-ViT-B-32,224,768,768,512,420.37,87.85,124.45,53.12,8.82,13.12 ViT-B-16-plus,224,896,640,640,208.35,117.19,91.16,56.75,47.49,9.27 -ViT-B-16-SigLIP-i18n-256,256,768,768,768,370.63,92.93,277.7,57.84,46.82,11.02 ViT-B-16-SigLIP-256,256,768,768,768,203.2,92.93,110.27,57.84,46.82,11.02 +ViT-B-16-SigLIP-i18n-256,256,768,768,768,370.63,92.93,277.7,57.84,46.82,11.02 ViT-B-16-plus-240,240,896,640,640,208.38,117.21,91.16,64.03,54.76,9.27 convnext_base_w_320,320,768,640,640,179.39,88.22,91.16,71.94,62.67,9.27 convnext_large,224,768,768,768,321.06,197.41,123.65,82.02,68.72,13.3 coca_base,288,768,768,512,440.34,86.4,134.66,99.09,46.47,13.3 -xlm-roberta-base-ViT-B-32,224,768,512,512,366.12,87.85,278.27,105.87,8.82,97.05 roberta-ViT-B-32,224,768,512,512,212.72,87.85,124.87,105.87,8.82,97.05 +xlm-roberta-base-ViT-B-32,224,768,512,512,366.12,87.85,278.27,105.87,8.82,97.05 convnext_large_d,256,768,768,768,351.77,199.77,152.0,107.5,89.76,17.73 ViT-B-16-SigLIP-384,384,768,768,768,203.45,93.18,110.27,123.15,112.13,11.02 ViT-L-16,224,1024,768,768,427.74,304.09,123.65,136.41,123.11,13.3 @@ -75,6 +75,7 @@ xlm-roberta-large-ViT-H-14,224,1280,512,1024,1193.01,632.08,560.94,671.01,334.59 ViT-SO400M-14-SigLIP-384,384,768,1152,1152,877.96,428.23,449.73,723.48,670.35,53.13 ViT-H-14-CLIPA-336,336,1280,1024,1024,968.64,632.48,336.16,800.88,781.45,19.43 ViT-bigG-14-CLIPA,224,1664,1280,1280,2517.22,1844.9,672.32,1007.93,967.5,40.44 +ViT-H-14-378-quickgelu,378,1280,1024,1024,986.71,632.68,354.03,1054.05,1006.96,47.09 ViT-bigG-14,224,1664,1280,1280,2539.57,1844.91,694.66,1065.36,967.5,97.86 nllb-clip-large,224,1280,512,1024,1399.22,632.08,767.14,1468.46,334.59,1133.87 ViT-e-14,224,1792,1280,1280,4581.09,3807.72,773.37,2091.45,1981.35,110.1 diff --git a/docs/openclip_classification_results.csv b/docs/openclip_classification_results.csv index 5f6f75a33..b135810a1 100644 --- a/docs/openclip_classification_results.csv +++ b/docs/openclip_classification_results.csv @@ -1,11 +1,14 @@ name,pretrained,params (M),FLOPs (B),Average perf. on 35 datasets,ImageNet 1k,Caltech-101,CIFAR-10,CIFAR-100,CLEVR Counts,CLEVR Distance,Country211,Describable Textures,EuroSAT,FGVC Aircraft,Food-101,GTSRB,ImageNet Sketch,ImageNet v2,ImageNet-A,ImageNet-O,ImageNet-R,KITTI Vehicle Distance,MNIST,ObjectNet,Oxford Flowers-102,Oxford-IIIT Pet,Pascal VOC 2007,PatchCamelyon,Rendered SST2,RESISC45,Stanford Cars,STL-10,SUN397,SVHN,iWildCam,Camelyon17,FMoW,Dollar Street,GeoDE +ViT-H-14-378-quickgelu,dfn5b,986.71,1054.05,0.7090,0.8437,0.9517,0.9880,0.9043,0.3596,0.2085,0.3787,0.7106,0.6133,0.7219,0.9623,0.6782,0.7324,0.7833,0.7964,0.3810,0.9376,0.3966,0.8364,0.7340,0.8935,0.9696,0.8241,0.6964,0.5546,0.7589,0.9598,0.9906,0.7733,0.6739,0.2205,0.7211,0.2075,0.7173,0.9349 EVA02-E-14-plus,laion2b_s9b_b144k,5044.89,2362.19,0.6980,0.8201,0.9535,0.9934,0.9316,0.2991,0.1998,0.3564,0.6777,0.7574,0.5360,0.9496,0.6740,0.7162,0.7564,0.8223,0.3540,0.9456,0.1842,0.7463,0.7937,0.8433,0.9567,0.8569,0.6442,0.6271,0.7490,0.9457,0.9926,0.7510,0.7560,0.2591,0.6948,0.2668,0.6951,0.9244 +ViT-H-14-quickgelu,dfn5b,986.11,381.68,0.6972,0.8344,0.9552,0.9878,0.9051,0.2967,0.2117,0.3442,0.7064,0.6546,0.7147,0.9568,0.6772,0.7274,0.7736,0.6987,0.3810,0.9296,0.3347,0.8579,0.6813,0.8995,0.9658,0.8184,0.6539,0.5464,0.7508,0.9580,0.9890,0.7691,0.6764,0.2025,0.7050,0.2079,0.7009,0.9286 ViT-SO400M-14-SigLIP-384,webli,877.96,723.48,0.6916,0.8308,0.9599,0.9672,0.8357,0.4071,0.2246,0.3645,0.7303,0.6354,0.6069,0.9635,0.6429,0.7454,0.7717,0.8247,0.2775,0.9575,0.2082,0.8862,0.7695,0.9114,0.9680,0.7171,0.5268,0.7002,0.7211,0.9521,0.9930,0.7541,0.5151,0.2294,0.6149,0.3309,0.7301,0.9328 ViT-bigG-14-CLIPA-336,datacomp1b,2517.76,2271.58,0.6888,0.8309,0.9529,0.9904,0.9123,0.1399,0.2161,0.4094,0.7293,0.6457,0.5561,0.9623,0.6407,0.7454,0.7726,0.8599,0.3130,0.9535,0.2630,0.8533,0.7966,0.8694,0.9562,0.8162,0.5411,0.6420,0.7257,0.9542,0.9956,0.7645,0.6691,0.2383,0.5874,0.1766,0.6869,0.9407 ViT-bigG-14-CLIPA,datacomp1b,2517.22,1007.93,0.6871,0.8270,0.9513,0.9912,0.9135,0.1357,0.2113,0.3921,0.7207,0.6861,0.5576,0.9583,0.6460,0.7431,0.7699,0.8179,0.3075,0.9512,0.2743,0.8544,0.7694,0.8693,0.9576,0.8188,0.5345,0.6332,0.7137,0.9560,0.9965,0.7642,0.6811,0.2269,0.5955,0.1959,0.6869,0.9382 ViT-SO400M-14-SigLIP,webli,877.36,233.54,0.6819,0.8203,0.9600,0.9679,0.8417,0.4210,0.2213,0.3243,0.7106,0.6274,0.6029,0.9556,0.6382,0.7402,0.7607,0.7185,0.2960,0.9506,0.2489,0.8929,0.7060,0.8982,0.9522,0.7034,0.5057,0.6936,0.7257,0.9032,0.9939,0.7436,0.5670,0.1915,0.6215,0.3163,0.7173,0.9278 EVA02-E-14,laion2b_s4b_b115k,4704.59,2311.42,0.6725,0.8196,0.9541,0.9925,0.9258,0.1632,0.2499,0.3482,0.6878,0.7446,0.4892,0.9523,0.6729,0.7151,0.7566,0.8044,0.3340,0.9407,0.1294,0.7581,0.7674,0.8210,0.9569,0.8136,0.4972,0.5859,0.7324,0.9438,0.9926,0.7658,0.6381,0.2289,0.4894,0.2801,0.6682,0.9182 ViT-H-14-CLIPA-336,datacomp1b,968.64,800.88,0.6713,0.8180,0.9467,0.9890,0.8968,0.1326,0.2254,0.3551,0.7197,0.6604,0.4718,0.9572,0.5816,0.7282,0.7562,0.8275,0.3115,0.9438,0.2574,0.8245,0.7742,0.8463,0.9573,0.8134,0.4979,0.6052,0.7114,0.9483,0.9955,0.7635,0.6599,0.2239,0.4357,0.2500,0.6822,0.9278 +ViT-L-14-quickgelu,dfn2b,427.62,175.33,0.6703,0.8141,0.9532,0.9836,0.8837,0.3325,0.2481,0.2823,0.6606,0.6493,0.3936,0.9457,0.6168,0.6832,0.7461,0.6677,0.3930,0.9000,0.2011,0.8470,0.7397,0.8654,0.9555,0.8162,0.6318,0.5502,0.7327,0.9470,0.9768,0.7546,0.6525,0.1883,0.6237,0.2237,0.6916,0.9111 ViT-bigG-14,laion2b_s39b_b160k,2539.57,1065.36,0.6694,0.8009,0.9484,0.9824,0.8752,0.2989,0.2002,0.3379,0.6867,0.6919,0.4953,0.9309,0.6244,0.6894,0.7359,0.6933,0.3785,0.9213,0.1308,0.7157,0.7284,0.8163,0.9529,0.8077,0.6364,0.6535,0.7235,0.9460,0.9850,0.7450,0.6961,0.1760,0.5905,0.2352,0.6857,0.9127 ViT-H-14-CLIPA,datacomp1b,968.24,354.02,0.6688,0.8152,0.9458,0.9888,0.8991,0.1513,0.2255,0.3401,0.7090,0.7146,0.4751,0.9554,0.5538,0.7272,0.7498,0.7701,0.3135,0.9426,0.2461,0.8189,0.7423,0.8437,0.9559,0.8170,0.4958,0.6189,0.7098,0.9458,0.9948,0.7608,0.6622,0.2160,0.4415,0.2684,0.6694,0.9236 ViT-H-14-quickgelu,metaclip_fullcc,986.11,381.68,0.6684,0.8051,0.9536,0.9804,0.8634,0.2115,0.1881,0.3716,0.7271,0.6450,0.5114,0.9423,0.6257,0.7052,0.7417,0.7533,0.3040,0.9342,0.2771,0.7266,0.7642,0.8448,0.9561,0.7495,0.6222,0.6925,0.7024,0.8990,0.9944,0.7440,0.5910,0.1680,0.5782,0.2314,0.6811,0.9077 @@ -18,8 +21,8 @@ EVA02-L-14-336,merged2b_s6b_b61k,428.08,395.16,0.6603,0.8039,0.9525,0.9892,0.898 ViT-L-14-CLIPA,datacomp1b,414.21,167.5,0.6577,0.7957,0.9453,0.9866,0.8850,0.1857,0.2449,0.2941,0.6963,0.6044,0.4299,0.9415,0.5906,0.7061,0.7305,0.7125,0.3370,0.9288,0.1927,0.7374,0.6988,0.8101,0.9497,0.8067,0.5915,0.5387,0.6843,0.9366,0.9919,0.7528,0.6390,0.1724,0.6760,0.2457,0.6647,0.9152 ViT-L-14,commonpool_xl_clip_s13b_b90k,427.62,175.33,0.6553,0.7637,0.9502,0.9797,0.8615,0.2547,0.2451,0.2984,0.6521,0.6681,0.3860,0.9355,0.5980,0.6538,0.6953,0.6197,0.3525,0.8924,0.2982,0.9040,0.7165,0.8006,0.9424,0.8336,0.5688,0.6178,0.6978,0.9352,0.9875,0.7351,0.6853,0.1439,0.5100,0.1705,0.6776,0.9056 convnext_xxlarge,laion2b_s34b_b82k_augreg_soup,1200.58,443.03,0.6545,0.7947,0.9448,0.9822,0.8687,0.1454,0.2365,0.3170,0.7053,0.6128,0.4434,0.9321,0.5508,0.6840,0.7260,0.6719,0.4060,0.9160,0.2363,0.8277,0.7273,0.8241,0.9445,0.8090,0.5142,0.6952,0.7190,0.9409,0.9810,0.7458,0.6254,0.1730,0.6071,0.0000,0.6764,0.9215 -convnext_xxlarge,laion2b_s34b_b82k_augreg_rewind,1200.58,443.03,0.6534,0.7931,0.9452,0.9823,0.8686,0.1651,0.2534,0.3155,0.7016,0.6331,0.4398,0.9308,0.5491,0.6825,0.7228,0.6657,0.3975,0.9139,0.2419,0.7930,0.7252,0.8241,0.9438,0.8100,0.5014,0.6897,0.7168,0.9406,0.9801,0.7459,0.6137,0.1735,0.6071,0.0000,0.6799,0.9228 ViT-L-16-SigLIP-256,webli,652.15,201.62,0.6534,0.8045,0.9593,0.9619,0.8191,0.4065,0.2150,0.2141,0.7027,0.5598,0.5259,0.9463,0.6115,0.7209,0.7376,0.6213,0.3265,0.9396,0.1983,0.8499,0.6526,0.8827,0.9604,0.7409,0.5458,0.6172,0.6817,0.9386,0.9911,0.7253,0.5211,0.1796,0.5757,0.1296,0.6904,0.9173 +convnext_xxlarge,laion2b_s34b_b82k_augreg_rewind,1200.58,443.03,0.6534,0.7931,0.9452,0.9823,0.8686,0.1651,0.2534,0.3155,0.7016,0.6331,0.4398,0.9308,0.5491,0.6825,0.7228,0.6657,0.3975,0.9139,0.2419,0.7930,0.7252,0.8241,0.9438,0.8100,0.5014,0.6897,0.7168,0.9406,0.9801,0.7459,0.6137,0.1735,0.6071,0.0000,0.6799,0.9228 xlm-roberta-large-ViT-H-14,frozen_laion5b_s13b_b90k,1193.01,671.01,0.6519,0.7695,0.9422,0.9718,0.8430,0.3358,0.2050,0.3172,0.6926,0.6793,0.4673,0.9236,0.6239,0.6581,0.6944,0.5935,0.3390,0.8940,0.1364,0.7804,0.6911,0.7532,0.9431,0.7995,0.5792,0.6436,0.6825,0.9362,0.9889,0.7551,0.5950,0.1392,0.6749,0.2098,0.6460,0.9111 EVA02-L-14,merged2b_s4b_b131k,427.76,175.3,0.6502,0.7977,0.9512,0.9908,0.9071,0.3176,0.2462,0.3091,0.6319,0.6994,0.3638,0.9340,0.5718,0.6813,0.7295,0.7619,0.2880,0.9272,0.2518,0.6729,0.7489,0.7631,0.9398,0.8220,0.5431,0.6150,0.6968,0.9055,0.9961,0.7410,0.4793,0.1886,0.5124,0.2017,0.6624,0.9073 convnext_xxlarge,laion2b_s34b_b82k_augreg,1200.58,443.03,0.6494,0.7907,0.9429,0.9816,0.8677,0.1399,0.1195,0.3127,0.7096,0.6030,0.4250,0.9295,0.5454,0.6806,0.7223,0.6692,0.4025,0.9131,0.2616,0.8687,0.7235,0.8091,0.9455,0.8116,0.5340,0.6782,0.7100,0.9399,0.9824,0.7436,0.6379,0.1616,0.5719,0.0000,0.6729,0.9228 @@ -46,6 +49,7 @@ ViT-B-16,datacomp_xl_s13b_b90k,149.62,41.09,0.6178,0.7349,0.9380,0.9624,0.8212,0 ViT-B-32-256,datacomp_s34b_b86k,151.29,17.46,0.6133,0.7281,0.9348,0.9653,0.8287,0.2489,0.2271,0.1968,0.6064,0.6469,0.3645,0.8909,0.5152,0.6065,0.6481,0.3757,0.4635,0.8344,0.2658,0.7939,0.5960,0.7822,0.9115,0.7880,0.5880,0.5294,0.6505,0.8990,0.9731,0.7021,0.6708,0.0910,0.6252,0.0000,0.6238,0.8923 coca_ViT-L-14,mscoco_finetuned_laion2b_s13b_b90k,638.45,214.52,0.6128,0.7204,0.9420,0.9630,0.7965,0.3765,0.2501,0.1800,0.6213,0.5867,0.2329,0.8436,0.5453,0.6114,0.6475,0.4548,0.3865,0.8574,0.3797,0.8292,0.6253,0.7074,0.9115,0.8106,0.4943,0.6107,0.6267,0.8865,0.9861,0.7398,0.5564,0.1303,0.4294,0.1678,0.6636,0.8772 RN50x64,openai,623.26,552.65,0.6111,0.7391,0.9026,0.8510,0.5985,0.2254,0.1994,0.2981,0.5314,0.5765,0.3103,0.9205,0.4792,0.5593,0.6706,0.7077,0.3830,0.8441,0.3094,0.8583,0.6820,0.7745,0.9360,0.7398,0.5387,0.7106,0.6265,0.7581,0.9829,0.6661,0.6044,0.1469,0.5280,0.1939,0.6472,0.8898 +ViT-B-16,dfn2b,149.62,41.09,0.6090,0.7624,0.9429,0.9672,0.8349,0.2327,0.2453,0.1955,0.5755,0.5402,0.2473,0.9130,0.4701,0.6204,0.6818,0.4820,0.4925,0.8310,0.1927,0.7814,0.6319,0.8201,0.9372,0.7884,0.5214,0.4876,0.6137,0.9073,0.9753,0.7143,0.5985,0.1554,0.4993,0.1415,0.6250,0.8910 ViT-B-16-quickgelu,metaclip_fullcc,149.62,41.09,0.6042,0.7212,0.9328,0.9572,0.7891,0.2935,0.2260,0.2271,0.6223,0.5265,0.3059,0.8882,0.4659,0.6016,0.6505,0.4953,0.4150,0.8423,0.1871,0.6610,0.6138,0.7358,0.9175,0.7818,0.5915,0.5898,0.6744,0.8302,0.9841,0.6879,0.3909,0.1227,0.6993,0.1932,0.6402,0.8868 ViT-B-16-SigLIP-i18n-256,webli,370.63,57.84,0.6037,0.7513,0.9475,0.9118,0.7216,0.2552,0.1976,0.1593,0.6426,0.3826,0.3325,0.9171,0.5276,0.6588,0.6814,0.4585,0.3685,0.8920,0.3826,0.8301,0.5977,0.8387,0.9387,0.7536,0.5381,0.5700,0.5737,0.8926,0.9764,0.6978,0.4272,0.1451,0.4899,0.1064,0.6472,0.9186 ViT-L-14,laion400m_e32,427.62,175.33,0.5998,0.7277,0.9266,0.9464,0.7741,0.2421,0.2452,0.2302,0.6053,0.6233,0.2490,0.9007,0.4989,0.5964,0.6545,0.4647,0.4190,0.8467,0.1997,0.7612,0.5969,0.7306,0.9170,0.7561,0.4968,0.5601,0.6741,0.8962,0.9808,0.7258,0.4955,0.1254,0.4555,0.1708,0.6168,0.8839 @@ -77,8 +81,8 @@ ViT-B-32-quickgelu,metaclip_400m,151.28,14.78,0.5377,0.6558,0.9171,0.9125,0.7006 ViT-B-16,commonpool_l_clip_s1b_b8k,149.62,41.09,0.5348,0.5777,0.8853,0.9349,0.7313,0.2691,0.2313,0.1417,0.4500,0.4728,0.0822,0.7995,0.4657,0.4589,0.4995,0.2165,0.4950,0.6843,0.3755,0.7032,0.4914,0.5667,0.7561,0.7821,0.4962,0.5036,0.5295,0.8171,0.9496,0.6295,0.5985,0.0741,0.4920,0.1257,0.5818,0.8501 ViT-B-32-quickgelu,laion400m_e32,151.28,14.78,0.5282,0.6293,0.9118,0.9074,0.7029,0.1624,0.2391,0.1475,0.5457,0.5143,0.1658,0.8086,0.4197,0.4939,0.5506,0.2172,0.5345,0.7342,0.2897,0.3733,0.4389,0.6620,0.8671,0.7582,0.5592,0.5228,0.5454,0.7926,0.9560,0.6700,0.3039,0.0745,0.4709,0.1296,0.5491,0.8380 ViT-B-32-quickgelu,laion400m_e31,151.28,14.78,0.5273,0.6294,0.9121,0.9060,0.7021,0.1659,0.2397,0.1476,0.5447,0.5085,0.1675,0.8080,0.4230,0.4937,0.5487,0.2161,0.5335,0.7349,0.2911,0.3656,0.4374,0.6638,0.8629,0.7539,0.5543,0.5217,0.5446,0.7914,0.9553,0.6702,0.3144,0.0788,0.4554,0.1310,0.5467,0.8363 -ViT-B-32-quickgelu,openai,151.28,14.78,0.5265,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.0732,0.6061,0.1676,0.5386,0.8217 ViT-B-32,openai,151.28,14.78,0.5265,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.0732,0.6061,0.1676,0.5386,0.8217 +ViT-B-32-quickgelu,openai,151.28,14.78,0.5265,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.0732,0.6061,0.1676,0.5386,0.8217 RN50x4,openai,178.3,51.82,0.5191,0.6627,0.8661,0.7943,0.4514,0.2045,0.0905,0.2039,0.4862,0.3354,0.2102,0.8640,0.3622,0.4468,0.5944,0.4145,0.4955,0.7274,0.2335,0.4903,0.5141,0.6766,0.8829,0.6814,0.5675,0.6716,0.5338,0.6673,0.9658,0.6089,0.3190,0.0870,0.5435,0.1130,0.5654,0.8376 ViT-B-32,laion400m_e31,151.28,14.78,0.5070,0.6022,0.8916,0.8825,0.6781,0.1549,0.2261,0.1356,0.5218,0.4694,0.1437,0.7814,0.4082,0.4648,0.5234,0.1957,0.5085,0.7079,0.1224,0.4108,0.4281,0.6319,0.8541,0.7312,0.5495,0.5162,0.5108,0.7436,0.9494,0.6508,0.2891,0.0745,0.4975,0.1076,0.5491,0.8328 ViT-B-32,laion400m_e32,151.28,14.78,0.5067,0.6024,0.8918,0.8840,0.6773,0.1536,0.2261,0.1349,0.5229,0.4754,0.1467,0.7817,0.4070,0.4646,0.5237,0.1953,0.5080,0.7084,0.1181,0.4000,0.4292,0.6323,0.8513,0.7328,0.5490,0.5206,0.5094,0.7454,0.9498,0.6509,0.2759,0.0741,0.5084,0.1068,0.5444,0.8326 @@ -86,8 +90,8 @@ RN101,openai,119.69,25.5,0.5036,0.6228,0.8527,0.8078,0.4764,0.2437,0.0923,0.1693 RN101-quickgelu,openai,119.69,25.5,0.5036,0.6228,0.8527,0.8078,0.4764,0.2437,0.0923,0.1693,0.4335,0.3131,0.1853,0.8367,0.3753,0.4106,0.5612,0.2944,0.5085,0.6817,0.2644,0.5254,0.4515,0.6532,0.8652,0.6512,0.5819,0.6403,0.5476,0.6100,0.9680,0.5803,0.3185,0.0888,0.4723,0.1615,0.5631,0.8164 ViT-B-16,commonpool_l_laion_s1b_b8k,149.62,41.09,0.5017,0.5526,0.8766,0.9296,0.7184,0.2681,0.2173,0.1119,0.4144,0.4115,0.0714,0.7661,0.3296,0.4315,0.4790,0.2004,0.4930,0.6501,0.3432,0.4753,0.4638,0.5023,0.7769,0.7686,0.5158,0.5228,0.5314,0.6760,0.9409,0.6278,0.4301,0.0490,0.5127,0.1026,0.5514,0.8463 RN50,openai,102.01,18.18,0.4812,0.5982,0.8329,0.7157,0.4030,0.2171,0.1623,0.1542,0.4154,0.4081,0.1703,0.8080,0.3510,0.3544,0.5284,0.2327,0.5720,0.6073,0.1730,0.5755,0.4141,0.6522,0.8529,0.6510,0.6393,0.5645,0.4521,0.5453,0.9419,0.5994,0.2883,0.0623,0.5624,0.0000,0.5222,0.8129 -ViT-B-16,commonpool_l_image_s1b_b8k,149.62,41.09,0.4812,0.5719,0.8856,0.9321,0.6955,0.2143,0.2453,0.1308,0.4170,0.3193,0.0735,0.7797,0.2514,0.4343,0.4872,0.2143,0.4725,0.6356,0.3826,0.2219,0.4793,0.4817,0.7784,0.7841,0.5002,0.4986,0.4622,0.6627,0.9489,0.6335,0.2673,0.0424,0.5000,0.0000,0.5946,0.8422 RN50-quickgelu,openai,102.01,18.18,0.4812,0.5982,0.8329,0.7157,0.4030,0.2171,0.1623,0.1542,0.4154,0.4081,0.1703,0.8080,0.3510,0.3544,0.5284,0.2327,0.5720,0.6073,0.1730,0.5755,0.4141,0.6522,0.8529,0.6510,0.6393,0.5645,0.4521,0.5453,0.9419,0.5994,0.2883,0.0623,0.5624,0.0000,0.5222,0.8129 +ViT-B-16,commonpool_l_image_s1b_b8k,149.62,41.09,0.4812,0.5719,0.8856,0.9321,0.6955,0.2143,0.2453,0.1308,0.4170,0.3193,0.0735,0.7797,0.2514,0.4343,0.4872,0.2143,0.4725,0.6356,0.3826,0.2219,0.4793,0.4817,0.7784,0.7841,0.5002,0.4986,0.4622,0.6627,0.9489,0.6335,0.2673,0.0424,0.5000,0.0000,0.5946,0.8422 ViT-B-16,commonpool_l_text_s1b_b8k,149.62,41.09,0.4758,0.5605,0.8720,0.9391,0.7054,0.1843,0.2373,0.0995,0.3941,0.3830,0.0451,0.7724,0.2317,0.4437,0.4835,0.2220,0.4770,0.6708,0.2686,0.2593,0.4911,0.5164,0.7049,0.7669,0.4857,0.4931,0.4663,0.6525,0.9523,0.6088,0.2122,0.0623,0.5697,0.0000,0.5643,0.8564 ViT-B-16,commonpool_l_basic_s1b_b8k,149.62,41.09,0.4566,0.5155,0.8444,0.8289,0.5251,0.2061,0.2277,0.1173,0.4133,0.3820,0.0481,0.7461,0.2021,0.3932,0.4325,0.1913,0.4600,0.6087,0.3333,0.2809,0.4493,0.4357,0.6956,0.7151,0.5899,0.5387,0.4313,0.7216,0.9373,0.5974,0.1173,0.0436,0.5712,0.0000,0.5421,0.8384 ViT-B-16,commonpool_l_s1b_b8k,149.62,41.09,0.4386,0.4593,0.8089,0.9133,0.6421,0.1594,0.2203,0.1177,0.3383,0.3348,0.0316,0.6735,0.2766,0.3448,0.3914,0.1592,0.4335,0.5265,0.2686,0.3603,0.4126,0.3681,0.5587,0.7093,0.5516,0.5118,0.4154,0.6060,0.9339,0.5713,0.3047,0.0399,0.5102,0.0000,0.5654,0.8305 @@ -108,8 +112,8 @@ RN50-quickgelu,yfcc15m,102.01,18.18,0.2747,0.3275,0.5089,0.4919,0.2033,0.1305,0. ViT-B-32,commonpool_m_s128m_b4k,151.28,14.78,0.2614,0.1755,0.5231,0.7459,0.4391,0.1263,0.2265,0.0362,0.1606,0.2537,0.0115,0.2342,0.0869,0.0952,0.1440,0.0388,0.2780,0.1983,0.2743,0.0933,0.1574,0.1128,0.1676,0.5448,0.5048,0.5003,0.1810,0.1332,0.7690,0.3066,0.0933,0.0127,0.5015,0.0000,0.4276,0.5942 ViT-B-32,commonpool_s_clip_s13m_b4k,151.28,14.78,0.1778,0.0505,0.2483,0.4768,0.1937,0.1529,0.2313,0.0119,0.0782,0.2067,0.0083,0.0801,0.0732,0.0200,0.0380,0.0181,0.1380,0.0655,0.2785,0.0874,0.0506,0.0539,0.0796,0.3379,0.6367,0.5014,0.0806,0.0276,0.5353,0.1126,0.1166,0.0004,0.6874,0.0000,0.2605,0.2827 ViT-B-32,commonpool_s_text_s13m_b4k,151.28,14.78,0.1601,0.0460,0.2231,0.4679,0.1844,0.1350,0.1899,0.0121,0.0670,0.0896,0.0139,0.0618,0.0411,0.0175,0.0398,0.0187,0.1270,0.0606,0.3980,0.0771,0.0494,0.0428,0.0581,0.2942,0.5027,0.5008,0.1029,0.0204,0.5019,0.1051,0.0933,0.0015,0.5000,0.0000,0.2745,0.2843 -ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1492,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0033,0.5425,0.0085,0.2150,0.2752 ViT-B-32,commonpool_s_image_s13m_b4k,151.28,14.78,0.1492,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0033,0.5425,0.0085,0.2150,0.2752 +ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1492,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0033,0.5425,0.0085,0.2150,0.2752 ViT-B-32,commonpool_s_basic_s13m_b4k,151.28,14.78,0.1445,0.0377,0.1806,0.2664,0.1154,0.1245,0.2335,0.0120,0.0553,0.0587,0.0103,0.0588,0.0638,0.0151,0.0319,0.0203,0.0985,0.0499,0.3390,0.1085,0.0440,0.0351,0.0488,0.3081,0.5096,0.4986,0.0795,0.0200,0.4659,0.0879,0.0810,0.0003,0.5001,0.0000,0.2325,0.2643 ViT-B-32,commonpool_s_s13m_b4k,151.28,14.78,0.1441,0.0270,0.1564,0.4079,0.1296,0.1305,0.2233,0.0126,0.0574,0.1487,0.0081,0.0473,0.0654,0.0108,0.0234,0.0141,0.1000,0.0404,0.3460,0.0708,0.0360,0.0338,0.0443,0.2235,0.5268,0.5008,0.0698,0.0143,0.4266,0.0766,0.1121,0.0002,0.5124,0.0000,0.2290,0.2167 ViT-B-32,commonpool_s_laion_s13m_b4k,151.28,14.78,0.1367,0.0305,0.1549,0.3364,0.1347,0.1309,0.1299,0.0098,0.0553,0.1578,0.0134,0.0501,0.0538,0.0125,0.0271,0.0147,0.1015,0.0443,0.2518,0.1387,0.0369,0.0244,0.0399,0.3030,0.4216,0.4992,0.0583,0.0155,0.4874,0.0659,0.1473,0.0017,0.3703,0.0000,0.2079,0.2580 diff --git a/docs/openclip_results.csv b/docs/openclip_results.csv index 50db8f9f9..996ec7b6f 100644 --- a/docs/openclip_results.csv +++ b/docs/openclip_results.csv @@ -1,10 +1,13 @@ name,pretrained,params (M),FLOPs (B),Average perf. on 38 datasets,ImageNet 1k,Caltech-101,CIFAR-10,CIFAR-100,CLEVR Counts,CLEVR Distance,Country211,Describable Textures,EuroSAT,FGVC Aircraft,Food-101,GTSRB,ImageNet Sketch,ImageNet v2,ImageNet-A,ImageNet-O,ImageNet-R,KITTI Vehicle Distance,MNIST,ObjectNet,Oxford Flowers-102,Oxford-IIIT Pet,Pascal VOC 2007,PatchCamelyon,Rendered SST2,RESISC45,Stanford Cars,STL-10,SUN397,SVHN,Flickr,MSCOCO,WinoGAViL,iWildCam,Camelyon17,FMoW,Dollar Street,GeoDE +ViT-H-14-378-quickgelu,dfn5b,986.71,1054.05,0.7079,0.8437,0.9517,0.9880,0.9043,0.3596,0.2085,0.3787,0.7106,0.6133,0.7219,0.9623,0.6782,0.7324,0.7833,0.7964,0.3810,0.9376,0.3966,0.8364,0.7340,0.8935,0.9696,0.8241,0.6964,0.5546,0.7589,0.9598,0.9906,0.7733,0.6739,0.8801,0.6376,0.5674,0.2205,0.7211,0.2075,0.7173,0.9349 +ViT-H-14-quickgelu,dfn5b,986.11,381.68,0.6961,0.8344,0.9552,0.9878,0.9051,0.2967,0.2117,0.3442,0.7064,0.6546,0.7147,0.9568,0.6772,0.7274,0.7736,0.6987,0.3810,0.9296,0.3347,0.8579,0.6813,0.8995,0.9658,0.8184,0.6539,0.5464,0.7508,0.9580,0.9890,0.7691,0.6764,0.8646,0.6311,0.5561,0.2025,0.7050,0.2079,0.7009,0.9286 EVA02-E-14-plus,laion2b_s9b_b144k,5044.89,2362.19,0.6930,0.8201,0.9535,0.9934,0.9316,0.2991,0.1998,0.3564,0.6777,0.7574,0.5360,0.9496,0.6740,0.7162,0.7564,0.8223,0.3540,0.9456,0.1842,0.7463,0.7937,0.8433,0.9567,0.8569,0.6442,0.6271,0.7490,0.9457,0.9926,0.7510,0.7560,0.8648,0.5991,0.4403,0.2591,0.6948,0.2668,0.6951,0.9244 ViT-SO400M-14-SigLIP-384,webli,877.96,723.48,0.6921,0.8308,0.9599,0.9672,0.8357,0.4071,0.2246,0.3645,0.7303,0.6354,0.6069,0.9635,0.6429,0.7454,0.7717,0.8247,0.2775,0.9575,0.2082,0.8862,0.7695,0.9114,0.9680,0.7171,0.5268,0.7002,0.7211,0.9521,0.9930,0.7541,0.5151,0.8863,0.6331,0.5754,0.2294,0.6149,0.3309,0.7301,0.9328 ViT-bigG-14-CLIPA-336,datacomp1b,2517.76,2271.58,0.6842,0.8309,0.9529,0.9904,0.9123,0.1399,0.2161,0.4094,0.7293,0.6457,0.5561,0.9623,0.6407,0.7454,0.7726,0.8599,0.3130,0.9535,0.2630,0.8533,0.7966,0.8694,0.9562,0.8162,0.5411,0.6420,0.7257,0.9542,0.9956,0.7645,0.6691,0.8524,0.5909,0.4487,0.2383,0.5874,0.1766,0.6869,0.9407 ViT-bigG-14-CLIPA,datacomp1b,2517.22,1007.93,0.6822,0.8270,0.9513,0.9912,0.9135,0.1357,0.2113,0.3921,0.7207,0.6861,0.5576,0.9583,0.6460,0.7431,0.7699,0.8179,0.3075,0.9512,0.2743,0.8544,0.7694,0.8693,0.9576,0.8188,0.5345,0.6332,0.7137,0.9560,0.9965,0.7642,0.6811,0.8488,0.5888,0.4383,0.2269,0.5955,0.1959,0.6869,0.9382 ViT-SO400M-14-SigLIP,webli,877.36,233.54,0.6808,0.8203,0.9600,0.9679,0.8417,0.4210,0.2213,0.3243,0.7106,0.6274,0.6029,0.9556,0.6382,0.7402,0.7607,0.7185,0.2960,0.9506,0.2489,0.8929,0.7060,0.8982,0.9522,0.7034,0.5057,0.6936,0.7257,0.9032,0.9939,0.7436,0.5670,0.8313,0.6071,0.5665,0.1915,0.6215,0.3163,0.7173,0.9278 EVA02-E-14,laion2b_s4b_b115k,4704.59,2311.42,0.6690,0.8196,0.9541,0.9925,0.9258,0.1632,0.2499,0.3482,0.6878,0.7446,0.4892,0.9523,0.6729,0.7151,0.7566,0.8044,0.3340,0.9407,0.1294,0.7581,0.7674,0.8210,0.9569,0.8136,0.4972,0.5859,0.7324,0.9438,0.9926,0.7658,0.6381,0.8515,0.5892,0.4429,0.2289,0.4894,0.2801,0.6682,0.9182 +ViT-L-14-quickgelu,dfn2b,427.62,175.33,0.6687,0.8141,0.9532,0.9836,0.8837,0.3325,0.2481,0.2823,0.6606,0.6493,0.3936,0.9457,0.6168,0.6832,0.7461,0.6677,0.3930,0.9000,0.2011,0.8470,0.7397,0.8654,0.9555,0.8162,0.6318,0.5502,0.7327,0.9470,0.9768,0.7546,0.6525,0.8253,0.5708,0.5519,0.1883,0.6237,0.2237,0.6916,0.9111 ViT-L-16-SigLIP-384,webli,652.48,422.91,0.6683,0.8207,0.9611,0.9605,0.8188,0.3275,0.2077,0.2470,0.7080,0.5817,0.5312,0.9564,0.6385,0.7360,0.7593,0.7663,0.3130,0.9507,0.2222,0.8525,0.7284,0.8934,0.9681,0.7172,0.5466,0.5634,0.6789,0.9493,0.9924,0.7250,0.5672,0.8756,0.6290,0.5550,0.2236,0.6637,0.1489,0.6916,0.9207 ViT-H-14-CLIPA-336,datacomp1b,968.64,800.88,0.6677,0.8180,0.9467,0.9890,0.8968,0.1326,0.2254,0.3551,0.7197,0.6604,0.4718,0.9572,0.5816,0.7282,0.7562,0.8275,0.3115,0.9438,0.2574,0.8245,0.7742,0.8463,0.9573,0.8134,0.4979,0.6052,0.7114,0.9483,0.9955,0.7635,0.6599,0.8356,0.5822,0.4587,0.2239,0.4357,0.2500,0.6822,0.9278 ViT-H-14-quickgelu,metaclip_fullcc,986.11,381.68,0.6671,0.8051,0.9536,0.9804,0.8634,0.2115,0.1881,0.3716,0.7271,0.6450,0.5114,0.9423,0.6257,0.7052,0.7417,0.7533,0.3040,0.9342,0.2771,0.7266,0.7642,0.8448,0.9561,0.7495,0.6222,0.6925,0.7024,0.8990,0.9944,0.7440,0.5910,0.8507,0.5752,0.5312,0.1680,0.5782,0.2314,0.6811,0.9077 @@ -15,7 +18,7 @@ EVA01-g-14-plus,merged2b_s11b_b114k,1366.62,581.15,0.6624,0.7933,0.9506,0.9910,0 ViT-L-14-quickgelu,metaclip_fullcc,427.62,175.33,0.6592,0.7917,0.9527,0.9759,0.8410,0.3107,0.2260,0.3394,0.6862,0.5894,0.4537,0.9352,0.5623,0.6896,0.7256,0.7231,0.3010,0.9205,0.2785,0.6444,0.7457,0.8143,0.9461,0.8030,0.6197,0.6678,0.7360,0.8868,0.9933,0.7355,0.4681,0.8326,0.5576,0.5357,0.1581,0.7551,0.2592,0.6752,0.9140 EVA02-L-14-336,merged2b_s6b_b61k,428.08,395.16,0.6583,0.8039,0.9525,0.9892,0.8980,0.3635,0.2485,0.3354,0.6473,0.7139,0.3758,0.9421,0.5759,0.6891,0.7380,0.8289,0.2850,0.9324,0.2377,0.6421,0.7789,0.7645,0.9424,0.8267,0.5487,0.6463,0.6910,0.9158,0.9966,0.7480,0.4575,0.8381,0.5605,0.5053,0.2105,0.5691,0.2198,0.6811,0.9136 ViT-L-14-CLIPA-336,datacomp1b,414.54,387.39,0.6570,0.8026,0.9439,0.9864,0.8826,0.1566,0.2439,0.3066,0.6856,0.5811,0.4281,0.9456,0.5695,0.7087,0.7346,0.7771,0.3290,0.9329,0.1997,0.7667,0.7317,0.8100,0.9495,0.7979,0.6028,0.5316,0.6884,0.9407,0.9929,0.7560,0.6290,0.8251,0.5640,0.4449,0.1937,0.6783,0.2500,0.6752,0.9240 -ViT-L-16-SigLIP-256,webli,652.15,201.62,0.6557,0.8045,0.9593,0.9619,0.8191,0.4065,0.2150,0.2141,0.7027,0.5598,0.5259,0.9463,0.6115,0.7209,0.7376,0.6213,0.3265,0.9396,0.1983,0.8499,0.6526,0.8827,0.9604,0.7409,0.5458,0.6172,0.6817,0.9386,0.9911,0.7253,0.5211,0.8542,0.6154,0.5748,0.1796,0.5757,0.1296,0.6904,0.9173 +ViT-L-16-SigLIP-256,webli,65m2.15,201.62,0.6557,0.8045,0.9593,0.9619,0.8191,0.4065,0.2150,0.2141,0.7027,0.5598,0.5259,0.9463,0.6115,0.7209,0.7376,0.6213,0.3265,0.9396,0.1983,0.8499,0.6526,0.8827,0.9604,0.7409,0.5458,0.6172,0.6817,0.9386,0.9911,0.7253,0.5211,0.8542,0.6154,0.5748,0.1796,0.5757,0.1296,0.6904,0.9173 ViT-L-14-CLIPA,datacomp1b,414.21,167.5,0.6536,0.7957,0.9453,0.9866,0.8850,0.1857,0.2449,0.2941,0.6963,0.6044,0.4299,0.9415,0.5906,0.7061,0.7305,0.7125,0.3370,0.9288,0.1927,0.7374,0.6988,0.8101,0.9497,0.8067,0.5915,0.5387,0.6843,0.9366,0.9919,0.7528,0.6390,0.8188,0.5604,0.4388,0.1724,0.6760,0.2457,0.6647,0.9152 convnext_xxlarge,laion2b_s34b_b82k_augreg_soup,1200.58,443.03,0.6530,0.7947,0.9448,0.9822,0.8687,0.1454,0.2365,0.3170,0.7053,0.6128,0.4434,0.9321,0.5508,0.6840,0.7260,0.6719,0.4060,0.9160,0.2363,0.8277,0.7273,0.8241,0.9445,0.8090,0.5142,0.6952,0.7190,0.9409,0.9810,0.7458,0.6254,0.8521,0.5867,0.4702,0.1730,0.6071,0.0000,0.6764,0.9215 convnext_xxlarge,laion2b_s34b_b82k_augreg_rewind,1200.58,443.03,0.6521,0.7931,0.9452,0.9823,0.8686,0.1651,0.2534,0.3155,0.7016,0.6331,0.4398,0.9308,0.5491,0.6825,0.7228,0.6657,0.3975,0.9139,0.2419,0.7930,0.7252,0.8241,0.9438,0.8100,0.5014,0.6897,0.7168,0.9406,0.9801,0.7459,0.6137,0.8498,0.5871,0.4741,0.1735,0.6071,0.0000,0.6799,0.9228 @@ -44,6 +47,7 @@ ViT-L-14,laion2b_s32b_b82k,427.62,175.33,0.6205,0.7525,0.9388,0.9662,0.8332,0.31 ViT-L-14,openai,427.62,175.33,0.6173,0.7554,0.9249,0.9559,0.7582,0.1943,0.2021,0.3187,0.5537,0.6263,0.3181,0.9305,0.5055,0.5959,0.6983,0.7075,0.3235,0.8784,0.2180,0.7634,0.6889,0.7923,0.9323,0.7828,0.5204,0.6881,0.6337,0.7788,0.9936,0.6756,0.5840,0.7508,0.4642,0.4136,0.1211,0.6741,0.2229,0.6297,0.8839 coca_ViT-L-14,mscoco_finetuned_laion2b_s13b_b90k,638.45,214.52,0.6159,0.7204,0.9420,0.9630,0.7965,0.3765,0.2501,0.1800,0.6213,0.5867,0.2329,0.8436,0.5453,0.6114,0.6475,0.4548,0.3865,0.8574,0.3797,0.8292,0.6253,0.7074,0.9115,0.8106,0.4943,0.6107,0.6267,0.8865,0.9861,0.7398,0.5564,0.8373,0.6028,0.5146,0.1303,0.4294,0.1678,0.6636,0.8772 ViT-B-16,datacomp_xl_s13b_b90k,149.62,41.09,0.6147,0.7349,0.9380,0.9624,0.8212,0.3267,0.2461,0.2215,0.5793,0.5883,0.2970,0.9047,0.5523,0.6044,0.6598,0.4840,0.4285,0.8362,0.2883,0.7649,0.6350,0.7701,0.9254,0.8178,0.6002,0.5162,0.6535,0.8883,0.9811,0.7051,0.6272,0.7633,0.4880,0.4832,0.1181,0.4799,0.1504,0.6168,0.8990 +ViT-B-16,dfn2b,149.62,41.09,0.6092,0.7624,0.9429,0.9672,0.8349,0.2327,0.2453,0.1955,0.5755,0.5402,0.2473,0.9130,0.4701,0.6204,0.6818,0.4820,0.4925,0.8310,0.1927,0.7814,0.6319,0.8201,0.9372,0.7884,0.5214,0.4876,0.6137,0.9073,0.9753,0.7143,0.5985,0.7726,0.5188,0.5417,0.1554,0.4993,0.1415,0.6250,0.8910 ViT-B-32-256,datacomp_s34b_b86k,151.29,17.46,0.6087,0.7281,0.9348,0.9653,0.8287,0.2489,0.2271,0.1968,0.6064,0.6469,0.3645,0.8909,0.5152,0.6065,0.6481,0.3757,0.4635,0.8344,0.2658,0.7939,0.5960,0.7822,0.9115,0.7880,0.5880,0.5294,0.6505,0.8990,0.9731,0.7021,0.6708,0.7486,0.4892,0.4300,0.0910,0.6252,0.0000,0.6238,0.8923 ViT-B-16-SigLIP-i18n-256,webli,370.63,57.84,0.6068,0.7513,0.9475,0.9118,0.7216,0.2552,0.1976,0.1593,0.6426,0.3826,0.3325,0.9171,0.5276,0.6588,0.6814,0.4585,0.3685,0.8920,0.3826,0.8301,0.5977,0.8387,0.9387,0.7536,0.5381,0.5700,0.5737,0.8926,0.9764,0.6978,0.4272,0.8088,0.5470,0.5710,0.1451,0.4899,0.1064,0.6472,0.9186 RN50x64,openai,623.26,552.65,0.6061,0.7391,0.9026,0.8510,0.5985,0.2254,0.1994,0.2981,0.5314,0.5765,0.3103,0.9205,0.4792,0.5593,0.6706,0.7077,0.3830,0.8441,0.3094,0.8583,0.6820,0.7745,0.9360,0.7398,0.5387,0.7106,0.6265,0.7581,0.9829,0.6661,0.6044,0.7794,0.4683,0.3936,0.1469,0.5280,0.1939,0.6472,0.8898 @@ -77,8 +81,8 @@ ViT-B-16,datacomp_l_s1b_b8k,149.62,41.09,0.5372,0.6310,0.8969,0.9381,0.7540,0.23 ViT-B-16,commonpool_l_clip_s1b_b8k,149.62,41.09,0.5294,0.5777,0.8853,0.9349,0.7313,0.2691,0.2313,0.1417,0.4500,0.4728,0.0822,0.7995,0.4657,0.4589,0.4995,0.2165,0.4950,0.6843,0.3755,0.7032,0.4914,0.5667,0.7561,0.7821,0.4962,0.5036,0.5295,0.8171,0.9496,0.6295,0.5985,0.5956,0.3658,0.4359,0.0741,0.4920,0.1257,0.5818,0.8501 ViT-B-32-quickgelu,laion400m_e32,151.28,14.78,0.5272,0.6293,0.9118,0.9074,0.7029,0.1624,0.2391,0.1475,0.5457,0.5143,0.1658,0.8086,0.4197,0.4939,0.5506,0.2172,0.5345,0.7342,0.2897,0.3733,0.4389,0.6620,0.8671,0.7582,0.5592,0.5228,0.5454,0.7926,0.9560,0.6700,0.3039,0.7025,0.4395,0.4072,0.0745,0.4709,0.1296,0.5491,0.8380 ViT-B-32-quickgelu,laion400m_e31,151.28,14.78,0.5263,0.6294,0.9121,0.9060,0.7021,0.1659,0.2397,0.1476,0.5447,0.5085,0.1675,0.8080,0.4230,0.4937,0.5487,0.2161,0.5335,0.7349,0.2911,0.3656,0.4374,0.6638,0.8629,0.7539,0.5543,0.5217,0.5446,0.7914,0.9553,0.6702,0.3144,0.7022,0.4395,0.4034,0.0788,0.4554,0.1310,0.5467,0.8363 -ViT-B-32-quickgelu,openai,151.28,14.78,0.5245,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.6884,0.4028,0.4125,0.0732,0.6061,0.1676,0.5386,0.8217 ViT-B-32,openai,151.28,14.78,0.5245,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.6884,0.4028,0.4125,0.0732,0.6061,0.1676,0.5386,0.8217 +ViT-B-32-quickgelu,openai,151.28,14.78,0.5245,0.6332,0.8758,0.8983,0.6423,0.2320,0.2335,0.1720,0.4436,0.5044,0.1953,0.8400,0.3258,0.4229,0.5592,0.3155,0.4775,0.6933,0.2743,0.4839,0.4431,0.6670,0.8700,0.7640,0.6224,0.5865,0.5362,0.5963,0.9713,0.6248,0.3159,0.6884,0.4028,0.4125,0.0732,0.6061,0.1676,0.5386,0.8217 RN50x4,openai,178.3,51.82,0.5188,0.6627,0.8661,0.7943,0.4514,0.2045,0.0905,0.2039,0.4862,0.3354,0.2102,0.8640,0.3622,0.4468,0.5944,0.4145,0.4955,0.7274,0.2335,0.4903,0.5141,0.6766,0.8829,0.6814,0.5675,0.6716,0.5338,0.6673,0.9658,0.6089,0.3190,0.7234,0.4318,0.3912,0.0870,0.5435,0.1130,0.5654,0.8376 ViT-B-32,laion400m_e31,151.28,14.78,0.5077,0.6022,0.8916,0.8825,0.6781,0.1549,0.2261,0.1356,0.5218,0.4694,0.1437,0.7814,0.4082,0.4648,0.5234,0.1957,0.5085,0.7079,0.1224,0.4108,0.4281,0.6319,0.8541,0.7312,0.5495,0.5162,0.5108,0.7436,0.9494,0.6508,0.2891,0.6890,0.4327,0.4262,0.0745,0.4975,0.1076,0.5491,0.8328 ViT-B-32,laion400m_e32,151.28,14.78,0.5074,0.6024,0.8918,0.8840,0.6773,0.1536,0.2261,0.1349,0.5229,0.4754,0.1467,0.7817,0.4070,0.4646,0.5237,0.1953,0.5080,0.7084,0.1181,0.4000,0.4292,0.6323,0.8513,0.7328,0.5490,0.5206,0.5094,0.7454,0.9498,0.6509,0.2759,0.6866,0.4337,0.4265,0.0741,0.5084,0.1068,0.5444,0.8326 @@ -108,8 +112,8 @@ RN50-quickgelu,yfcc15m,102.01,18.18,0.2776,0.3275,0.5089,0.4919,0.2033,0.1305,0. ViT-B-32,commonpool_m_s128m_b4k,151.28,14.78,0.2580,0.1755,0.5231,0.7459,0.4391,0.1263,0.2265,0.0362,0.1606,0.2537,0.0115,0.2342,0.0869,0.0952,0.1440,0.0388,0.2780,0.1983,0.2743,0.0933,0.1574,0.1128,0.1676,0.5448,0.5048,0.5003,0.1810,0.1332,0.7690,0.3066,0.0933,0.1599,0.0974,0.3983,0.0127,0.5015,0.0000,0.4276,0.5942 ViT-B-32,commonpool_s_clip_s13m_b4k,151.28,14.78,0.1731,0.0505,0.2483,0.4768,0.1937,0.1529,0.2313,0.0119,0.0782,0.2067,0.0083,0.0801,0.0732,0.0200,0.0380,0.0181,0.1380,0.0655,0.2785,0.0874,0.0506,0.0539,0.0796,0.3379,0.6367,0.5014,0.0806,0.0276,0.5353,0.1126,0.1166,0.0343,0.0224,0.2994,0.0004,0.6874,0.0000,0.2605,0.2827 ViT-B-32,commonpool_s_text_s13m_b4k,151.28,14.78,0.1573,0.0460,0.2231,0.4679,0.1844,0.1350,0.1899,0.0121,0.0670,0.0896,0.0139,0.0618,0.0411,0.0175,0.0398,0.0187,0.1270,0.0606,0.3980,0.0771,0.0494,0.0428,0.0581,0.2942,0.5027,0.5008,0.1029,0.0204,0.5019,0.1051,0.0933,0.0424,0.0214,0.3120,0.0015,0.5000,0.0000,0.2745,0.2843 -ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1449,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0171,0.0130,0.2525,0.0033,0.5425,0.0085,0.2150,0.2752 ViT-B-32,commonpool_s_image_s13m_b4k,151.28,14.78,0.1449,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0171,0.0130,0.2525,0.0033,0.5425,0.0085,0.2150,0.2752 +ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1449,0.0392,0.2238,0.3176,0.1329,0.1121,0.2217,0.0109,0.0521,0.1593,0.0120,0.0604,0.0579,0.0186,0.0308,0.0155,0.1055,0.0578,0.2883,0.0991,0.0436,0.0528,0.0474,0.2666,0.5273,0.4646,0.0794,0.0173,0.4601,0.0725,0.1305,0.0171,0.0130,0.2525,0.0033,0.5425,0.0085,0.2150,0.2752 ViT-B-32,commonpool_s_basic_s13m_b4k,151.28,14.78,0.1423,0.0377,0.1806,0.2664,0.1154,0.1245,0.2335,0.0120,0.0553,0.0587,0.0103,0.0588,0.0638,0.0151,0.0319,0.0203,0.0985,0.0499,0.3390,0.1085,0.0440,0.0351,0.0488,0.3081,0.5096,0.4986,0.0795,0.0200,0.4659,0.0879,0.0810,0.0328,0.0168,0.3033,0.0003,0.5001,0.0000,0.2325,0.2643 ViT-B-32,commonpool_s_s13m_b4k,151.28,14.78,0.1420,0.0270,0.1564,0.4079,0.1296,0.1305,0.2233,0.0126,0.0574,0.1487,0.0081,0.0473,0.0654,0.0108,0.0234,0.0141,0.1000,0.0404,0.3460,0.0708,0.0360,0.0338,0.0443,0.2235,0.5268,0.5008,0.0698,0.0143,0.4266,0.0766,0.1121,0.0257,0.0132,0.3126,0.0002,0.5124,0.0000,0.2290,0.2167 ViT-B-32,commonpool_s_laion_s13m_b4k,151.28,14.78,0.1332,0.0305,0.1549,0.3364,0.1347,0.1309,0.1299,0.0098,0.0553,0.1578,0.0134,0.0501,0.0538,0.0125,0.0271,0.0147,0.1015,0.0443,0.2518,0.1387,0.0369,0.0244,0.0399,0.3030,0.4216,0.4992,0.0583,0.0155,0.4874,0.0659,0.1473,0.0223,0.0121,0.2410,0.0017,0.3703,0.0000,0.2079,0.2580 diff --git a/docs/openclip_retrieval_results.csv b/docs/openclip_retrieval_results.csv index 433f6d253..7b1f71832 100644 --- a/docs/openclip_retrieval_results.csv +++ b/docs/openclip_retrieval_results.csv @@ -1,6 +1,8 @@ name,pretrained,params (M),FLOPs (B),Average score,Flickr image retr. R@1,Flickr image retr. R@5,Flickr image retr. R@10,Flickr text retr. R@1,Flickr text retr. R@5,Flickr text retr. R@10,MSCOCO image retr. R@1,MSCOCO image retr. R@5,MSCOCO image retr. R@10,MSCOCO text retr. R@1,MSCOCO text retr. R@5,MSCOCO text retr. R@10,WinoGAViL avg jaccard score,WinoGAViL jaccard score 10,WinoGAViL jaccard score 10-12,WinoGAViL jaccard score 12,WinoGAViL jaccard score 5,WinoGAViL jaccard score 5-6,WinoGAViL jaccard score 6 ViT-SO400M-14-SigLIP-384,webli,877.96,723.48,0.7721,0.8296,0.9610,0.9804,0.9430,0.9970,0.9980,0.5421,0.7678,0.8424,0.7242,0.8998,0.9448,0.6181,0.5807,0.5754,0.5701,0.6427,0.6316,0.6210 +ViT-H-14-378-quickgelu,dfn5b,986.71,1054.05,0.7719,0.8202,0.9598,0.9798,0.9400,0.9920,0.9960,0.5564,0.7920,0.8626,0.7188,0.9048,0.9496,0.6123,0.5668,0.5674,0.5679,0.6409,0.6265,0.6127 ViT-L-16-SigLIP-384,webli,652.48,422.91,0.7642,0.8142,0.9536,0.9748,0.9370,0.9920,0.9990,0.5391,0.7657,0.8399,0.7190,0.9006,0.9410,0.6070,0.5562,0.5550,0.5539,0.6422,0.6234,0.6056 +ViT-H-14-quickgelu,dfn5b,986.11,381.68,0.7640,0.8012,0.9526,0.9738,0.9280,0.9940,0.9990,0.5391,0.7805,0.8553,0.7230,0.9024,0.9452,0.6022,0.5568,0.5561,0.5554,0.6315,0.6167,0.6028 ViT-L-16-SigLIP-256,webli,652.15,201.62,0.7619,0.7904,0.9446,0.9684,0.9180,0.9900,0.9980,0.5228,0.7580,0.8334,0.7080,0.8870,0.9374,0.6142,0.5877,0.5748,0.5619,0.6450,0.6267,0.6092 ViT-SO400M-14-SigLIP,webli,877.36,233.54,0.7567,0.7526,0.9226,0.9554,0.9100,0.9910,0.9980,0.5176,0.7527,0.8300,0.6966,0.8908,0.9348,0.6189,0.5736,0.5665,0.5594,0.6430,0.6354,0.6282 ViT-B-16-SigLIP-512,webli,203.79,227.26,0.7554,0.7906,0.9458,0.9690,0.9250,0.9920,0.9960,0.5055,0.7421,0.8217,0.6872,0.8786,0.9266,0.6070,0.5761,0.5696,0.5633,0.6291,0.6187,0.6088 @@ -9,6 +11,7 @@ ViT-B-16-SigLIP-256,webli,203.2,57.84,0.7462,0.7504,0.9242,0.9626,0.9040,0.9830, ViT-B-16-SigLIP,webli,203.16,46.44,0.7442,0.7468,0.9230,0.9562,0.8910,0.9800,0.9930,0.4778,0.7244,0.8100,0.6574,0.8542,0.9126,0.6134,0.5726,0.5738,0.5750,0.6335,0.6259,0.6186 coca_ViT-L-14,mscoco_finetuned_laion2b_s13b_b90k,638.45,214.52,0.7432,0.7846,0.9452,0.9712,0.8900,0.9870,0.9950,0.5374,0.7779,0.8567,0.6682,0.8762,0.9242,0.5762,0.5265,0.5146,0.5028,0.5978,0.5956,0.5934 ViT-H-14-quickgelu,metaclip_fullcc,986.11,381.68,0.7412,0.7834,0.9464,0.9692,0.9180,0.9870,0.9970,0.4882,0.7323,0.8136,0.6622,0.8618,0.9188,0.5891,0.5316,0.5312,0.5308,0.6220,0.6073,0.5934 +ViT-L-14-quickgelu,dfn2b,427.62,175.33,0.7406,0.7546,0.9280,0.9608,0.8960,0.9850,0.9930,0.4856,0.7381,0.8237,0.6560,0.8584,0.9120,0.5955,0.5688,0.5519,0.5351,0.6219,0.6093,0.5973 xlm-roberta-large-ViT-H-14,frozen_laion5b_s13b_b90k,1193.01,671.01,0.7363,0.7742,0.9392,0.9670,0.9180,0.9930,0.9980,0.4921,0.7305,0.8159,0.6596,0.8618,0.9184,0.5767,0.5277,0.5206,0.5135,0.6076,0.5943,0.5818 ViT-B-16-SigLIP-i18n-256,webli,370.63,57.84,0.7347,0.7216,0.9036,0.9470,0.8960,0.9820,0.9910,0.4492,0.6948,0.7837,0.6448,0.8434,0.9060,0.6113,0.5887,0.5710,0.5534,0.6303,0.6240,0.6180 ViT-L-14-quickgelu,metaclip_fullcc,427.62,175.33,0.7325,0.7642,0.9366,0.9646,0.9010,0.9850,0.9930,0.4709,0.7141,0.8026,0.6442,0.8504,0.9130,0.5817,0.5539,0.5357,0.5176,0.5990,0.5963,0.5937 @@ -26,6 +29,7 @@ ViT-H-14-CLIPA-336,datacomp1b,968.64,800.88,0.7192,0.7652,0.9350,0.9630,0.9060,0 EVA02-E-14-plus,laion2b_s9b_b144k,5044.89,2362.19,0.7188,0.7886,0.9434,0.9698,0.9410,0.9930,0.9980,0.5110,0.7492,0.8276,0.6872,0.8760,0.9274,0.5134,0.4576,0.4403,0.4232,0.5470,0.5364,0.5263 ViT-H-14-CLIPA,datacomp1b,968.24,354.02,0.7178,0.7588,0.9330,0.9628,0.9100,0.9900,0.9960,0.4910,0.7291,0.8140,0.6698,0.8730,0.9272,0.5343,0.4793,0.4578,0.4365,0.5641,0.5584,0.5529 ViT-bigG-14-CLIPA,datacomp1b,2517.22,1007.93,0.7175,0.7786,0.9374,0.9650,0.9190,0.9930,0.9980,0.4996,0.7414,0.8214,0.6780,0.8742,0.9312,0.5247,0.4461,0.4383,0.4306,0.5552,0.5519,0.5488 +ViT-B-16,dfn2b,149.62,41.09,0.7158,0.6912,0.8982,0.9406,0.8540,0.9780,0.9860,0.4339,0.6924,0.7882,0.6038,0.8310,0.8946,0.5852,0.5509,0.5417,0.5327,0.6058,0.5989,0.5923 EVA02-E-14,laion2b_s4b_b115k,4704.59,2311.42,0.7152,0.7810,0.9438,0.9700,0.9220,0.9950,0.9970,0.5037,0.7429,0.8224,0.6746,0.8738,0.9256,0.5110,0.4316,0.4429,0.4542,0.5485,0.5325,0.5173 convnext_large_d_320,laion2b_s29b_b131k_ft_soup,351.77,157.98,0.7148,0.7704,0.9394,0.9648,0.9230,0.9890,0.9980,0.4829,0.7251,0.8103,0.6502,0.8608,0.9180,0.5285,0.4574,0.4549,0.4525,0.5650,0.5517,0.5391 ViT-L-14-quickgelu,metaclip_400m,427.62,175.33,0.7137,0.7342,0.9228,0.9574,0.8620,0.9800,0.9960,0.4381,0.6862,0.7780,0.6000,0.8290,0.8934,0.5701,0.5322,0.5175,0.5029,0.5941,0.5867,0.5796 @@ -33,8 +37,8 @@ convnext_large_d_320,laion2b_s29b_b131k_ft,351.77,157.98,0.7122,0.7690,0.9364,0. convnext_large_d,laion2b_s26b_b102k_augreg,351.77,107.5,0.7102,0.7588,0.9310,0.9652,0.9180,0.9850,0.9940,0.4701,0.7139,0.8013,0.6400,0.8506,0.9140,0.5252,0.4835,0.4646,0.4457,0.5571,0.5443,0.5322 ViT-L-14-CLIPA-336,datacomp1b,414.54,387.39,0.7092,0.7462,0.9240,0.9590,0.9040,0.9920,0.9980,0.4715,0.7159,0.8024,0.6564,0.8636,0.9180,0.5276,0.4601,0.4449,0.4297,0.5611,0.5537,0.5467 ViT-L-14,datacomp_xl_s13b_b90k,427.62,175.33,0.7091,0.7338,0.9174,0.9554,0.8900,0.9860,0.9970,0.4573,0.7003,0.7916,0.6330,0.8414,0.9040,0.5471,0.4836,0.4666,0.4497,0.5912,0.5724,0.5546 -coca_ViT-L-14,laion2b_s13b_b90k,638.45,214.52,0.7085,0.7428,0.9202,0.9542,0.8840,0.9930,0.9990,0.4565,0.7042,0.7921,0.6292,0.8370,0.9038,0.5403,0.4800,0.4739,0.4677,0.5704,0.5612,0.5525 ViT-g-14,laion2b_s12b_b42k,1366.68,581.15,0.7085,0.7642,0.9364,0.9624,0.9090,0.9910,0.9980,0.4802,0.7238,0.8079,0.6492,0.8530,0.9152,0.5178,0.4498,0.4427,0.4357,0.5589,0.5414,0.5249 +coca_ViT-L-14,laion2b_s13b_b90k,638.45,214.52,0.7085,0.7428,0.9202,0.9542,0.8840,0.9930,0.9990,0.4565,0.7042,0.7921,0.6292,0.8370,0.9038,0.5403,0.4800,0.4739,0.4677,0.5704,0.5612,0.5525 ViT-B-16-quickgelu,metaclip_fullcc,149.62,41.09,0.7077,0.7072,0.9082,0.9454,0.8550,0.9740,0.9890,0.4134,0.6718,0.7696,0.5936,0.8058,0.8784,0.5787,0.5391,0.5221,0.5052,0.6064,0.5965,0.5871 ViT-H-14,laion2b_s32b_b79k,986.11,381.68,0.7057,0.7764,0.9418,0.9660,0.9070,0.9920,0.9970,0.4948,0.7338,0.8151,0.6592,0.8606,0.9188,0.4998,0.4289,0.4247,0.4206,0.5462,0.5234,0.5018 convnext_base_w,laion_aesthetic_s13b_b82k,179.39,49.38,0.7047,0.7306,0.9160,0.9532,0.8880,0.9820,0.9930,0.4355,0.6858,0.7808,0.6120,0.8338,0.8978,0.5461,0.4937,0.4764,0.4592,0.5829,0.5681,0.5540 @@ -73,18 +77,18 @@ RN50x64,openai,623.26,552.65,0.6563,0.6898,0.8990,0.9432,0.8690,0.9820,0.9920,0. convnext_base,laion400m_s13b_b51k,151.52,36.67,0.6541,0.6496,0.8814,0.9304,0.8380,0.9710,0.9910,0.3760,0.6315,0.7337,0.5470,0.7990,0.8676,0.4811,0.4146,0.4045,0.3944,0.5145,0.5052,0.4964 RN50x16,openai,290.98,162.69,0.6518,0.6534,0.8710,0.9178,0.8570,0.9700,0.9880,0.3541,0.6002,0.7014,0.5536,0.7876,0.8670,0.4957,0.4311,0.3946,0.3584,0.5419,0.5275,0.5138 ViT-B-16,openai,149.62,41.09,0.6507,0.6216,0.8572,0.9192,0.8220,0.9660,0.9900,0.3309,0.5842,0.6899,0.5242,0.7670,0.8462,0.5171,0.4487,0.4316,0.4146,0.5550,0.5441,0.5337 -ViT-B-32,laion400m_e32,151.28,14.78,0.6412,0.5962,0.8396,0.9020,0.7770,0.9410,0.9680,0.3431,0.6000,0.7054,0.5244,0.7642,0.8454,0.5055,0.4272,0.4265,0.4258,0.5476,0.5304,0.5139 ViT-B-32,laion400m_e31,151.28,14.78,0.6412,0.5970,0.8398,0.9036,0.7810,0.9380,0.9660,0.3420,0.6001,0.7059,0.5234,0.7634,0.8432,0.5059,0.4283,0.4262,0.4242,0.5477,0.5310,0.5152 +ViT-B-32,laion400m_e32,151.28,14.78,0.6412,0.5962,0.8396,0.9020,0.7770,0.9410,0.9680,0.3431,0.6000,0.7054,0.5244,0.7642,0.8454,0.5055,0.4272,0.4265,0.4258,0.5476,0.5304,0.5139 ViT-B-32-quickgelu,laion400m_e32,151.28,14.78,0.6394,0.6170,0.8546,0.9086,0.7880,0.9400,0.9700,0.3533,0.6089,0.7165,0.5258,0.7672,0.8464,0.4884,0.4097,0.4072,0.4047,0.5280,0.5140,0.5006 ViT-B-32-quickgelu,laion400m_e31,151.28,14.78,0.6389,0.6174,0.8548,0.9078,0.7870,0.9400,0.9730,0.3535,0.6100,0.7177,0.5254,0.7702,0.8490,0.4860,0.4054,0.4034,0.4015,0.5273,0.5120,0.4974 RN50x4,openai,178.3,51.82,0.6373,0.6258,0.8476,0.9018,0.8210,0.9630,0.9830,0.3339,0.5812,0.6830,0.5296,0.7662,0.8490,0.4893,0.4104,0.3912,0.3720,0.5354,0.5202,0.5058 nllb-clip-large,v1,1399.22,1468.46,0.6346,0.6090,0.8576,0.9202,0.7160,0.9220,0.9600,0.3617,0.6250,0.7304,0.4392,0.7086,0.8036,0.5097,0.4334,0.4299,0.4265,0.5395,0.5348,0.5304 -ViT-B-32-quickgelu,openai,151.28,14.78,0.6321,0.5878,0.8356,0.9002,0.7890,0.9490,0.9820,0.3044,0.5594,0.6687,0.5012,0.7500,0.8352,0.5054,0.4454,0.4125,0.3798,0.5492,0.5347,0.5210 ViT-B-32,openai,151.28,14.78,0.6321,0.5878,0.8356,0.9002,0.7890,0.9490,0.9820,0.3044,0.5594,0.6687,0.5012,0.7500,0.8352,0.5054,0.4454,0.4125,0.3798,0.5492,0.5347,0.5210 +ViT-B-32-quickgelu,openai,151.28,14.78,0.6321,0.5878,0.8356,0.9002,0.7890,0.9490,0.9820,0.3044,0.5594,0.6687,0.5012,0.7500,0.8352,0.5054,0.4454,0.4125,0.3798,0.5492,0.5347,0.5210 ViT-B-16,commonpool_l_laion_s1b_b8k,149.62,41.09,0.6274,0.5664,0.8114,0.8836,0.7230,0.9100,0.9510,0.3195,0.5756,0.6848,0.4652,0.7170,0.8088,0.5227,0.4558,0.4476,0.4395,0.5563,0.5463,0.5369 ViT-B-16,datacomp_l_s1b_b8k,149.62,41.09,0.6267,0.5536,0.8090,0.8768,0.7320,0.9170,0.9480,0.3218,0.5747,0.6858,0.4872,0.7292,0.8246,0.5113,0.4613,0.4465,0.4318,0.5404,0.5317,0.5235 -RN101-quickgelu,openai,119.69,25.5,0.6249,0.5804,0.8228,0.8852,0.7900,0.9490,0.9740,0.3069,0.5546,0.6603,0.4982,0.7448,0.8250,0.4920,0.4347,0.4130,0.3913,0.5272,0.5170,0.5072 RN101,openai,119.69,25.5,0.6249,0.5804,0.8228,0.8852,0.7900,0.9490,0.9740,0.3069,0.5546,0.6603,0.4982,0.7448,0.8250,0.4920,0.4347,0.4130,0.3913,0.5272,0.5170,0.5072 +RN101-quickgelu,openai,119.69,25.5,0.6249,0.5804,0.8228,0.8852,0.7900,0.9490,0.9740,0.3069,0.5546,0.6603,0.4982,0.7448,0.8250,0.4920,0.4347,0.4130,0.3913,0.5272,0.5170,0.5072 ViT-B-16,commonpool_l_image_s1b_b8k,149.62,41.09,0.6164,0.5162,0.7908,0.8700,0.6890,0.8830,0.9270,0.2907,0.5449,0.6628,0.4338,0.6882,0.7902,0.5339,0.4932,0.4787,0.4643,0.5609,0.5513,0.5421 ViT-B-16,commonpool_l_basic_s1b_b8k,149.62,41.09,0.6132,0.5250,0.7880,0.8670,0.6780,0.8780,0.9330,0.2862,0.5411,0.6535,0.4304,0.6844,0.7826,0.5285,0.4872,0.4812,0.4753,0.5517,0.5434,0.5355 ViT-B-16,commonpool_l_text_s1b_b8k,149.62,41.09,0.6126,0.5336,0.7954,0.8650,0.6820,0.8880,0.9320,0.3020,0.5538,0.6674,0.4440,0.7012,0.7906,0.5147,0.4815,0.4570,0.4326,0.5394,0.5329,0.5266 @@ -110,7 +114,7 @@ ViT-B-32,commonpool_s_text_s13m_b4k,151.28,14.78,0.1922,0.0278,0.0950,0.1462,0.0 ViT-B-32,commonpool_s_basic_s13m_b4k,151.28,14.78,0.1874,0.0256,0.0924,0.1466,0.0400,0.1220,0.2000,0.0144,0.0487,0.0782,0.0192,0.0646,0.1024,0.4008,0.3164,0.3033,0.2903,0.4526,0.4315,0.4115 ViT-B-32,commonpool_s_clip_s13m_b4k,151.28,14.78,0.1857,0.0286,0.0938,0.1454,0.0400,0.1370,0.2070,0.0157,0.0515,0.0847,0.0292,0.0794,0.1206,0.3801,0.3282,0.2994,0.2708,0.4299,0.4055,0.3823 ViT-B-32,commonpool_s_s13m_b4k,151.28,14.78,0.1789,0.0214,0.0690,0.1138,0.0300,0.1080,0.1760,0.0088,0.0351,0.0601,0.0176,0.0568,0.0902,0.3979,0.3338,0.3126,0.2914,0.4484,0.4249,0.4025 -ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1536,0.0142,0.0594,0.0960,0.0200,0.0740,0.1160,0.0109,0.0385,0.0641,0.0150,0.0500,0.0842,0.3552,0.2669,0.2525,0.2382,0.4123,0.3875,0.3640 ViT-B-32,commonpool_s_image_s13m_b4k,151.28,14.78,0.1536,0.0142,0.0594,0.0960,0.0200,0.0740,0.1160,0.0109,0.0385,0.0641,0.0150,0.0500,0.0842,0.3552,0.2669,0.2525,0.2382,0.4123,0.3875,0.3640 +ViT-B-32,datacomp_s_s13m_b4k,151.28,14.78,0.1536,0.0142,0.0594,0.0960,0.0200,0.0740,0.1160,0.0109,0.0385,0.0641,0.0150,0.0500,0.0842,0.3552,0.2669,0.2525,0.2382,0.4123,0.3875,0.3640 ViT-B-32,commonpool_s_laion_s13m_b4k,151.28,14.78,0.1527,0.0176,0.0678,0.1056,0.0270,0.0870,0.1440,0.0102,0.0363,0.0607,0.0140,0.0442,0.0770,0.3463,0.2607,0.2410,0.2215,0.4049,0.3795,0.3554 coca_ViT-B-32,mscoco_finetuned_laion2b_s13b_b90k,253.56,33.34,0.1306,0.0074,0.0214,0.0436,0.0110,0.0490,0.0990,0.0033,0.0137,0.0249,0.0088,0.0338,0.0552,0.3299,0.2484,0.2329,0.2175,0.3873,0.3604,0.3348 diff --git a/src/open_clip/model_configs/ViT-H-14-378-quickgelu.json b/src/open_clip/model_configs/ViT-H-14-378-quickgelu.json new file mode 100644 index 000000000..e2b2ecf9a --- /dev/null +++ b/src/open_clip/model_configs/ViT-H-14-378-quickgelu.json @@ -0,0 +1,18 @@ +{ + "embed_dim": 1024, + "quick_gelu": true, + "vision_cfg": { + "image_size": 378, + "layers": 32, + "width": 1280, + "head_width": 80, + "patch_size": 14 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 1024, + "heads": 16, + "layers": 24 + } +} \ No newline at end of file diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 6d89f2fa1..8cc8d41b4 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -171,6 +171,8 @@ def _apcfg(url='', hf_hub='', **kwargs): commonpool_l_text_s1b_b8k=_pcfg(hf_hub='laion/CLIP-ViT-B-16-CommonPool.L.text-s1B-b8K/'), commonpool_l_basic_s1b_b8k=_pcfg(hf_hub='laion/CLIP-ViT-B-16-CommonPool.L.basic-s1B-b8K/'), commonpool_l_s1b_b8k=_pcfg(hf_hub='laion/CLIP-ViT-B-16-CommonPool.L-s1B-b8K/'), + # DFN + dfn2b=_pcfg(hf_hub='apple/DFN2B-CLIP-ViT-B-16/') ) _VITB16_quickgelu = dict( @@ -209,6 +211,7 @@ def _apcfg(url='', hf_hub='', **kwargs): "https://dl.fbaipublicfiles.com/MMPT/metaclip/l14_400m.pt"), metaclip_fullcc=_pcfg( "https://dl.fbaipublicfiles.com/MMPT/metaclip/l14_fullcc2.5b.pt"), + dfn2b=_pcfg(hf_hub='apple/DFN2B-CLIP-ViT-L-14/'), ) _VITL14_336 = dict( @@ -223,6 +226,19 @@ def _apcfg(url='', hf_hub='', **kwargs): _VITH14_quickgelu = dict( metaclip_fullcc=_pcfg( "https://dl.fbaipublicfiles.com/MMPT/metaclip/h14_fullcc2.5b.pt"), + dfn5b=_pcfg( + hf_hub='apple/DFN5B-CLIP-ViT-H-14/', + interpolation="bicubic", + resize_mode="squash" + ), +) + +_VITH14_378_quickgelu = dict( + dfn5b=_pcfg( + hf_hub='apple/DFN5B-CLIP-ViT-H-14-378/', + interpolation="bicubic", + resize_mode="squash" + ), ) _VITg14 = dict( @@ -307,6 +323,7 @@ def _apcfg(url='', hf_hub='', **kwargs): "ViT-L-14-336": _VITL14_336, "ViT-H-14": _VITH14, "ViT-H-14-quickgelu": _VITH14_quickgelu, + "ViT-H-14-378-quickgelu": _VITH14_378_quickgelu, "ViT-g-14": _VITg14, "ViT-bigG-14": _VITbigG14, diff --git a/src/training/profiler.py b/src/training/profiler.py index 6c90a5270..1805ca693 100644 --- a/src/training/profiler.py +++ b/src/training/profiler.py @@ -226,10 +226,11 @@ def main(): models_with_errors.append(m) df = pd.DataFrame(results, columns=results[0].keys()) + if 'gmacs' in df.columns: - df = df.sort_values('gmacs') + df = df.sort_values(by=['gmacs', 'mparams', 'model']) else: - df = df.sort_values('gflops') + df = df.sort_values(by=['gflops', 'mparams', 'model']) print('='*100) print('Done.') @@ -237,7 +238,8 @@ def main(): if args.results_file: df.to_csv(args.results_file, index=False) - print('Models with errors:', models_with_errors) + if models_with_errors: + print('Models with errors:', models_with_errors) if __name__ == '__main__': From 83824419a609b2d338c39569d1000350e825ad35 Mon Sep 17 00:00:00 2001 From: Gabriel Ilharco Date: Wed, 1 Nov 2023 05:26:30 +0000 Subject: [PATCH 2/2] Fix typo --- docs/openclip_results.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/openclip_results.csv b/docs/openclip_results.csv index 996ec7b6f..29626609f 100644 --- a/docs/openclip_results.csv +++ b/docs/openclip_results.csv @@ -18,7 +18,7 @@ EVA01-g-14-plus,merged2b_s11b_b114k,1366.62,581.15,0.6624,0.7933,0.9506,0.9910,0 ViT-L-14-quickgelu,metaclip_fullcc,427.62,175.33,0.6592,0.7917,0.9527,0.9759,0.8410,0.3107,0.2260,0.3394,0.6862,0.5894,0.4537,0.9352,0.5623,0.6896,0.7256,0.7231,0.3010,0.9205,0.2785,0.6444,0.7457,0.8143,0.9461,0.8030,0.6197,0.6678,0.7360,0.8868,0.9933,0.7355,0.4681,0.8326,0.5576,0.5357,0.1581,0.7551,0.2592,0.6752,0.9140 EVA02-L-14-336,merged2b_s6b_b61k,428.08,395.16,0.6583,0.8039,0.9525,0.9892,0.8980,0.3635,0.2485,0.3354,0.6473,0.7139,0.3758,0.9421,0.5759,0.6891,0.7380,0.8289,0.2850,0.9324,0.2377,0.6421,0.7789,0.7645,0.9424,0.8267,0.5487,0.6463,0.6910,0.9158,0.9966,0.7480,0.4575,0.8381,0.5605,0.5053,0.2105,0.5691,0.2198,0.6811,0.9136 ViT-L-14-CLIPA-336,datacomp1b,414.54,387.39,0.6570,0.8026,0.9439,0.9864,0.8826,0.1566,0.2439,0.3066,0.6856,0.5811,0.4281,0.9456,0.5695,0.7087,0.7346,0.7771,0.3290,0.9329,0.1997,0.7667,0.7317,0.8100,0.9495,0.7979,0.6028,0.5316,0.6884,0.9407,0.9929,0.7560,0.6290,0.8251,0.5640,0.4449,0.1937,0.6783,0.2500,0.6752,0.9240 -ViT-L-16-SigLIP-256,webli,65m2.15,201.62,0.6557,0.8045,0.9593,0.9619,0.8191,0.4065,0.2150,0.2141,0.7027,0.5598,0.5259,0.9463,0.6115,0.7209,0.7376,0.6213,0.3265,0.9396,0.1983,0.8499,0.6526,0.8827,0.9604,0.7409,0.5458,0.6172,0.6817,0.9386,0.9911,0.7253,0.5211,0.8542,0.6154,0.5748,0.1796,0.5757,0.1296,0.6904,0.9173 +ViT-L-16-SigLIP-256,webli,652.15,201.62,0.6557,0.8045,0.9593,0.9619,0.8191,0.4065,0.2150,0.2141,0.7027,0.5598,0.5259,0.9463,0.6115,0.7209,0.7376,0.6213,0.3265,0.9396,0.1983,0.8499,0.6526,0.8827,0.9604,0.7409,0.5458,0.6172,0.6817,0.9386,0.9911,0.7253,0.5211,0.8542,0.6154,0.5748,0.1796,0.5757,0.1296,0.6904,0.9173 ViT-L-14-CLIPA,datacomp1b,414.21,167.5,0.6536,0.7957,0.9453,0.9866,0.8850,0.1857,0.2449,0.2941,0.6963,0.6044,0.4299,0.9415,0.5906,0.7061,0.7305,0.7125,0.3370,0.9288,0.1927,0.7374,0.6988,0.8101,0.9497,0.8067,0.5915,0.5387,0.6843,0.9366,0.9919,0.7528,0.6390,0.8188,0.5604,0.4388,0.1724,0.6760,0.2457,0.6647,0.9152 convnext_xxlarge,laion2b_s34b_b82k_augreg_soup,1200.58,443.03,0.6530,0.7947,0.9448,0.9822,0.8687,0.1454,0.2365,0.3170,0.7053,0.6128,0.4434,0.9321,0.5508,0.6840,0.7260,0.6719,0.4060,0.9160,0.2363,0.8277,0.7273,0.8241,0.9445,0.8090,0.5142,0.6952,0.7190,0.9409,0.9810,0.7458,0.6254,0.8521,0.5867,0.4702,0.1730,0.6071,0.0000,0.6764,0.9215 convnext_xxlarge,laion2b_s34b_b82k_augreg_rewind,1200.58,443.03,0.6521,0.7931,0.9452,0.9823,0.8686,0.1651,0.2534,0.3155,0.7016,0.6331,0.4398,0.9308,0.5491,0.6825,0.7228,0.6657,0.3975,0.9139,0.2419,0.7930,0.7252,0.8241,0.9438,0.8100,0.5014,0.6897,0.7168,0.9406,0.9801,0.7459,0.6137,0.8498,0.5871,0.4741,0.1735,0.6071,0.0000,0.6799,0.9228