From a14da5aa99dad6370867ea1710dd66318b722064 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 21 Oct 2024 11:56:20 -0700 Subject: [PATCH] Add safeglobals to allow metaclip models to load with weights_only=True, add bigG metaclip while working with them --- src/open_clip/factory.py | 14 ++++++++++++++ .../model_configs/ViT-bigG-14-quickgelu.json | 19 +++++++++++++++++++ src/open_clip/pretrained.py | 5 +++++ 3 files changed, 38 insertions(+) create mode 100644 src/open_clip/model_configs/ViT-bigG-14-quickgelu.json diff --git a/src/open_clip/factory.py b/src/open_clip/factory.py index 82ebe2bb9..6b3ddade8 100644 --- a/src/open_clip/factory.py +++ b/src/open_clip/factory.py @@ -26,6 +26,20 @@ _MODEL_CONFIGS = {} # directory (model_name: config) of model architecture configs +try: + import _codecs + import numpy as np + # add safe globals, known to be needed for metaclip weights + torch.serialization.add_safe_globals([ + _codecs.encode, # now in pytorch main but some pytorch versions w/ weights_only flag don't have it + np.core.multiarray.scalar, + np.dtype, + np.dtypes.Float64DType, + ]) +except Exception: + pass + + def _natural_key(string_): return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] diff --git a/src/open_clip/model_configs/ViT-bigG-14-quickgelu.json b/src/open_clip/model_configs/ViT-bigG-14-quickgelu.json new file mode 100644 index 000000000..fed567cc6 --- /dev/null +++ b/src/open_clip/model_configs/ViT-bigG-14-quickgelu.json @@ -0,0 +1,19 @@ +{ + "embed_dim": 1280, + "quick_gelu": true, + "vision_cfg": { + "image_size": 224, + "layers": 48, + "width": 1664, + "head_width": 104, + "mlp_ratio": 4.9231, + "patch_size": 14 + }, + "text_cfg": { + "context_length": 77, + "vocab_size": 49408, + "width": 1280, + "heads": 20, + "layers": 32 + } +} \ No newline at end of file diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 35de55064..8c89d3035 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -280,6 +280,10 @@ def _mccfg(url='', hf_hub='', **kwargs): laion2b_s39b_b160k=_pcfg(hf_hub='laion/CLIP-ViT-bigG-14-laion2B-39B-b160k/'), ) +_VITbigG14_quickgelu = dict( + metaclip_fullcc=_pcfg(url='https://dl.fbaipublicfiles.com/MMPT/metaclip/G14_fullcc2.5b.pt'), +) + _robertaViTB32 = dict( laion2b_s12b_b32k=_pcfg(hf_hub='laion/CLIP-ViT-B-32-roberta-base-laion2B-s12B-b32k/'), ) @@ -356,6 +360,7 @@ def _mccfg(url='', hf_hub='', **kwargs): "ViT-H-14-378-quickgelu": _VITH14_378_quickgelu, "ViT-g-14": _VITg14, "ViT-bigG-14": _VITbigG14, + "ViT-bigG-14-quickgelu": _VITbigG14_quickgelu, "roberta-ViT-B-32": _robertaViTB32, "xlm-roberta-base-ViT-B-32": _xlmRobertaBaseViTB32,