add correct 1B size

pytorch · Feb 28, 2024 · 8fea674 · 8fea674
1 parent bbc165f
commit 8fea674
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/torchtrain/models/llama/__init__.py b/torchtrain/models/llama/__init__.py
@@ -7,7 +7,8 @@
 
 llama_configs = {
     "debugmodel": ModelArgs(dim=256, n_layers=2, n_heads=16),
-    "1B": ModelArgs(dim=1024, n_layers=16, n_heads=8),
+    "271M": ModelArgs(dim=1024, n_layers=16, n_heads=8),
+    "1B": ModelArgs(dim=2048, n_layers=18, n_heads=16),
     "7B": ModelArgs(dim=4096, n_layers=32, n_heads=32),
     "13B": ModelArgs(dim=5120, n_layers=40, n_heads=40),
     "26B": ModelArgs(dim=5120, n_layers=80, n_heads=40),

diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml
@@ -3,7 +3,7 @@
 dump_folder = "./outputs"
 
 [profiling]
-run_profiler = true
+run_profiler = false
 save_traces_folder = "profiling/traces"
 # profiling frequency - example: 10 means every 10th iter will be profiled
 profile_every_x_iter = 10
@@ -15,7 +15,7 @@ log_freq = 10
 
 [model]
 name = "llama"
-flavor = "debugmodel"
+flavor = "1B"
 tokenizer_path = "./torchtrain/datasets/tokenizer/tokenizer.model"
 
 [optimizer]