diff --git a/docs/source/content/getting_started.md b/docs/source/content/getting_started.md index 13952cd5e..67ec8e47b 100644 --- a/docs/source/content/getting_started.md +++ b/docs/source/content/getting_started.md @@ -28,4 +28,5 @@ You will need to make sure you accept the agreements for any gated models, but o * https://huggingface.co/mistralai/Mixtral-8x7B-v0.1 * https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 +* https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2 * https://huggingface.co/mistralai/Mistral-7B-v0.1 diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py index 0a8d132cc..5b7d1f975 100644 --- a/transformer_lens/loading_from_pretrained.py +++ b/transformer_lens/loading_from_pretrained.py @@ -147,6 +147,7 @@ "stabilityai/stablelm-tuned-alpha-7b", "mistralai/Mistral-7B-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-v0.1", "mistralai/Mixtral-8x7B-Instruct-v0.1", "bigscience/bloom-560m", @@ -557,6 +558,7 @@ ], "mistralai/Mistral-7B-v0.1": ["mistral-7b"], "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"], + "mistralai/Mistral-7B-Instruct-v0.2": ["mistral-7b-instruct-v0.2"], "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"], "mistralai/Mixtral-8x7B-Instruct-v0.1": [ "mixtral-instruct", @@ -939,7 +941,7 @@ def convert_hf_model_config(model_name: str, **kwargs): "act_fn": "silu", "normalization_type": "RMS", "positional_embedding_type": "rotary", - "window_size": 4096, + "window_size": hf_config.sliding_window, # This will be 4096 on v0.1, None on later models as none was used "attn_types": ["local"] * 32, "eps": 1e-05, "n_key_value_heads": 8,