Add the smaller starcoder variants.

huggingface · Jul 28, 2023 · 872d864 · 872d864
1 parent 3612217
commit 872d864
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 2 deletions.
diff --git a/candle-examples/examples/bigcode/main.rs b/candle-examples/examples/bigcode/main.rs
@@ -105,7 +105,7 @@ struct Args {
     #[arg(long, default_value_t = 100)]
     sample_len: usize,
 
-    #[arg(long, default_value = "bigcode/starcoder")]
+    #[arg(long, default_value = "bigcode/starcoderbase-1b")]
     model_id: String,
 
     #[arg(long, default_value = "main")]
@@ -139,7 +139,7 @@ fn main() -> Result<()> {
     let start = std::time::Instant::now();
     let device = candle_examples::device(args.cpu)?;
     let vb = VarBuilder::from_safetensors(weights, DType::F32, &device);
-    let config = Config::starcoder();
+    let config = Config::starcoder_1b();
     let model = GPTBigCode::load(vb, config)?;
     println!("loaded the model in {:?}", start.elapsed());
 

diff --git a/candle-examples/examples/bigcode/model.rs b/candle-examples/examples/bigcode/model.rs
@@ -48,6 +48,52 @@ pub struct Config {
 }
 
 impl Config {
+    #[allow(dead_code)]
+    pub fn starcoder_1b() -> Self {
+        Self {
+            vocab_size: 49152,
+            max_position_embeddings: 8192,
+            num_hidden_layers: 24,
+            hidden_size: 2048,
+            layer_norm_epsilon: 1e-5,
+            n_inner: Some(8192),
+            num_attention_heads: 16,
+            multi_query: true,
+            use_cache: true,
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn starcoder_3b() -> Self {
+        Self {
+            vocab_size: 49152,
+            max_position_embeddings: 8192,
+            num_hidden_layers: 36,
+            hidden_size: 2816,
+            layer_norm_epsilon: 1e-5,
+            n_inner: Some(11264),
+            num_attention_heads: 22,
+            multi_query: true,
+            use_cache: true,
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn starcoder_7b() -> Self {
+        Self {
+            vocab_size: 49152,
+            max_position_embeddings: 8192,
+            num_hidden_layers: 42,
+            hidden_size: 4096,
+            layer_norm_epsilon: 1e-5,
+            n_inner: Some(16384),
+            num_attention_heads: 32,
+            multi_query: true,
+            use_cache: true,
+        }
+    }
+
+    #[allow(dead_code)]
     pub fn starcoder() -> Self {
         Self {
             vocab_size: 49152,