diff --git a/transformer_lens/HookedTransformer.py b/transformer_lens/HookedTransformer.py
index afff3f508..0e022f3a1 100644
--- a/transformer_lens/HookedTransformer.py
+++ b/transformer_lens/HookedTransformer.py
@@ -1080,7 +1080,7 @@ def from_pretrained(
         tokenizer: Optional[PreTrainedTokenizerBase] = None,
         move_to_device: bool = True,
         fold_value_biases: bool = True,
-        default_prepend_bos: bool = True,
+        default_prepend_bos: Optional[bool] = None,
         default_padding_side: Literal["left", "right"] = "right",
         dtype="float32",
         first_n_layers: Optional[int] = None,
@@ -1202,14 +1202,16 @@ def from_pretrained(
                 remains exactly the same, and so is just broadcast across the destination positions.
             default_prepend_bos: Default behavior of whether to prepend the BOS
                 token when the methods of HookedTransformer process input text to tokenize (only
-                when input is a string). Defaults to True - even for models not explicitly trained
-                with this, heads often use the first position as a resting position and accordingly
-                lose information from the first token, so this empirically seems to give better
-                results. To change the default behavior to False, pass in default_prepend_bos=False.
-                Note that you can also locally override the default behavior by passing in
+                when input is a string). 
+                Resolution order for default_prepend_bos:
+                1. If user passes value explicitly, use that value
+                2. Model-specific default from cfg_dict if it exists (e.g. for bloom models it's False)
+                3. Global default (True)
+
+                Even for models not explicitly trained with the BOS token, heads often use the first position as a resting position 
+                and accordingly lose information from the first token, so this empirically seems to give better
+                results. Note that you can also locally override the default behavior by passing in
                 prepend_bos=True/False when you call a method that processes the input string.
-                If model is part of the Bloom model family, default_prepend_bos is set to False
-                by default and has to be locally overriden to True when you call a method if needed.
             from_pretrained_kwargs: Any other optional argument passed to
                 HuggingFace's from_pretrained (e.g. "cache_dir" or "torch_dtype"). Also passed to
                 other HuggingFace functions when compatible. For some models or arguments it doesn't
@@ -1269,10 +1271,6 @@ def from_pretrained(
         # Get the model name used in HuggingFace, rather than the alias.
         official_model_name = loading.get_official_model_name(model_name)
 
-        # Set prepend_bos to False by default if the model is part of the bloom model family
-        if "bloom" in official_model_name:
-            default_prepend_bos = False
-
         # Load the config into an HookedTransformerConfig object. If loading from a
         # checkpoint, the config object will contain the information about the
         # checkpoint
@@ -1356,7 +1354,7 @@ def from_pretrained_no_processing(
         refactor_factored_attn_matrices=False,
         fold_value_biases=False,
         dtype=torch.float32,
-        default_prepend_bos=True,
+        default_prepend_bos=None,
         default_padding_side="right",
         **from_pretrained_kwargs,
     ):
diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py
index 49dffbf04..b26ccc03b 100644
--- a/transformer_lens/loading_from_pretrained.py
+++ b/transformer_lens/loading_from_pretrained.py
@@ -1498,7 +1498,7 @@ def get_pretrained_model_config(
     fold_ln: bool = False,
     device: Optional[Union[str, torch.device]] = None,
     n_devices: int = 1,
-    default_prepend_bos: bool = True,
+    default_prepend_bos: Optional[bool] = None,
     dtype: torch.dtype = torch.float32,
     first_n_layers: Optional[int] = None,
     **kwargs,
@@ -1529,11 +1529,15 @@ def get_pretrained_model_config(
         n_devices (int, optional): The number of devices to split the model across. Defaults to 1.
         default_prepend_bos (bool, optional): Default behavior of whether to prepend the BOS token when the
             methods of HookedTransformer process input text to tokenize (only when input is a string).
-            Defaults to True - even for models not explicitly trained with this, heads often use the
+            Resolution order for default_prepend_bos:
+            1. If user passes value explicitly, use that value
+            2. Model-specific default from cfg_dict if it exists (e.g. for bloom models it's False)
+            3. Global default (True)
+
+            Even for models not explicitly trained with the BOS token, heads often use the
             first position as a resting position and accordingly lose information from the first token,
-            so this empirically seems to give better results. To change the default behavior to False, pass in
-            default_prepend_bos=False. Note that you can also locally override the default behavior by passing
-            in prepend_bos=True/False when you call a method that processes the input string.
+            so this empirically seems to give better results. Note that you can also locally override the default behavior 
+            by passing in prepend_bos=True/False when you call a method that processes the input string.
         dtype (torch.dtype, optional): The dtype to load the TransformerLens model in.
         kwargs: Other optional arguments passed to HuggingFace's from_pretrained.
             Also given to other HuggingFace functions when compatible.
@@ -1610,7 +1614,14 @@ def get_pretrained_model_config(
 
     cfg_dict["device"] = device
     cfg_dict["n_devices"] = n_devices
-    cfg_dict["default_prepend_bos"] = default_prepend_bos
+
+    if default_prepend_bos is not None:
+        # User explicitly set prepend_bos behavior, override config/default value
+        cfg_dict["default_prepend_bos"] = default_prepend_bos
+    elif "default_prepend_bos" not in cfg_dict:
+        # No config value or user override, set default value (True)
+        cfg_dict["default_prepend_bos"] = True
+        
     if hf_cfg is not None:
         cfg_dict["load_in_4bit"] = hf_cfg.get("quantization_config", {}).get("load_in_4bit", False)
     if first_n_layers is not None: