From e406c4c3ec28cfa45543cc83539747a698353b42 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Tue, 7 May 2024 16:28:35 -0700 Subject: [PATCH 1/4] Update loading_from_pretrained.py --- transformer_lens/loading_from_pretrained.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index 0a8d132cc..f99f8d0cb 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -147,6 +147,7 @@ "stabilityai/stablelm-tuned-alpha-7b", "mistralai/Mistral-7B-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-v0.1", "mistralai/Mixtral-8x7B-Instruct-v0.1", "bigscience/bloom-560m", @@ -557,6 +558,7 @@ ], "mistralai/Mistral-7B-v0.1": ["mistral-7b"], "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"], + "mistralai/Mistral-7B-Instruct-v0.2": ["mistral-7b-instruct"], "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"], "mistralai/Mixtral-8x7B-Instruct-v0.1": [ "mixtral-instruct", From 1fdc18a85476018f623fb956db188746976cd755 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Thu, 9 May 2024 18:56:35 -0700 Subject: [PATCH 2/4] Update loading_from_pretrained.py --- transformer_lens/loading_from_pretrained.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index f99f8d0cb..bb1e9b1ed 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -558,7 +558,7 @@ ], "mistralai/Mistral-7B-v0.1": ["mistral-7b"], "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"], - "mistralai/Mistral-7B-Instruct-v0.2": ["mistral-7b-instruct"], + "mistralai/Mistral-7B-Instruct-v0.2": ["mistral-7b-instruct-v0.2"], "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"], "mistralai/Mixtral-8x7B-Instruct-v0.1": [ "mixtral-instruct", From be6256faf53ea43d2ca8ea9d61e715ab2708615f Mon Sep 17 00:00:00 2001 From: mrfakename Date: Fri, 10 May 2024 14:53:19 -0700 Subject: [PATCH 3/4] Add v0.2 to docs --- docs/source/content/getting_started.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/content/getting_started.md b/docs/source/content/getting_started.md index 13952cd5e..67ec8e47b 100644 --- a/docs/source/content/getting_started.md +++ b/docs/source/content/getting_started.md @@ -28,4 +28,5 @@ You will need to make sure you accept the agreements for any gated models, but o * https://huggingface.co/mistralai/Mixtral-8x7B-v0.1 * https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 +* https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 * https://huggingface.co/mistralai/Mistral-7B-v0.1 From d7566e0277a964a3cd7d619169313f9924eb371d Mon Sep 17 00:00:00 2001 From: mrfakename Date: Fri, 10 May 2024 14:55:12 -0700 Subject: [PATCH 4/4] Update loading_from_pretrained.py --- transformer_lens/loading_from_pretrained.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index bb1e9b1ed..5b7d1f975 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -941,7 +941,7 @@ def convert_hf_model_config(model_name: str, **kwargs): "act_fn": "silu", "normalization_type": "RMS", "positional_embedding_type": "rotary", - "window_size": 4096, + "window_size": hf_config.sliding_window, # This will be 4096 on v0.1, None on later models as none was used "attn_types": ["local"] * 32, "eps": 1e-05, "n_key_value_heads": 8,