From 52b405e5d0e880bcfe80687568d58e29b21bdd09 Mon Sep 17 00:00:00 2001
From: "Documenter.jl" <documenter@juliadocs.github.io>
Date: Tue, 17 Dec 2024 10:29:59 +0000
Subject: [PATCH] build based on 2b043c1

---
 dev/.documenter-siteinfo.json |  2 +-
 dev/api/cells/index.html      | 30 +++++++++++++++---------------
 dev/api/layers/index.html     | 28 ++++++++++++++--------------
 dev/index.html                |  2 +-
 dev/roadmap/index.html        |  2 +-
 dev/search_index.js           |  2 +-
 6 files changed, 33 insertions(+), 33 deletions(-)
diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index ec1ff9c..92068ee 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2024-12-17T08:48:46","documenter_version":"1.8.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.11.2","generation_timestamp":"2024-12-17T10:29:51","documenter_version":"1.8.0"}}
\ No newline at end of file
diff --git a/dev/api/cells/index.html b/dev/api/cells/index.html
index c462d2e..10b568b 100644
--- a/dev/api/cells/index.html
+++ b/dev/api/cells/index.html
@@ -9,11 +9,11 @@
 c_t         &amp;= i_t \odot \tilde{c}_t + f_t \odot c_{t-1}, \\
 h_t         &amp;= g(c_t)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">rancell(inp, (state, cstate))
-rancell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the rancell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the RANCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/ran_cell.jl#L10-L56">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.IndRNNCell" href="#RecurrentLayers.IndRNNCell"><code>RecurrentLayers.IndRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndRNNCell((input_size =&gt; hidden_size)::Pair, σ=relu;
+rancell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the rancell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the RANCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/ran_cell.jl#L10-L57">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.IndRNNCell" href="#RecurrentLayers.IndRNNCell"><code>RecurrentLayers.IndRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndRNNCell((input_size =&gt; hidden_size)::Pair, σ=relu;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/pdf/1803.04831">Independently recurrent cell</a>. See <a href="../layers/#RecurrentLayers.IndRNN"><code>IndRNN</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>σ</code>: activation function. Default is <code>tanh</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\mathbf{h}_{t} = \sigma(\mathbf{W} \mathbf{x}_t + \mathbf{u} \odot \mathbf{h}_{t-1} + \mathbf{b})\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">indrnncell(inp, state)
-indrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the indrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the IndRNNCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/indrnn_cell.jl#L11-L49">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LightRUCell" href="#RecurrentLayers.LightRUCell"><code>RecurrentLayers.LightRUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LightRUCell((input_size =&gt; hidden_size)::Pair;
+indrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the indrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the IndRNNCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/indrnn_cell.jl#L11-L50">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LightRUCell" href="#RecurrentLayers.LightRUCell"><code>RecurrentLayers.LightRUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LightRUCell((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://www.mdpi.com/2079-9292/13/16/3204">Light recurrent unit</a>. See <a href="../layers/#RecurrentLayers.LightRU"><code>LightRU</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -21,7 +21,7 @@
 f_t         &amp;= \delta(W_f x_t + U_f h_{t-1} + b_f), \\
 h_t         &amp;= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t.
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">lightrucell(inp, state)
-lightrucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the lightrucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the LightRUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/lightru_cell.jl#L10-L50">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LiGRUCell" href="#RecurrentLayers.LiGRUCell"><code>RecurrentLayers.LiGRUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LiGRUCell((input_size =&gt; hidden_size)::Pair;
+lightrucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the lightrucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the LightRUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/lightru_cell.jl#L10-L51">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LiGRUCell" href="#RecurrentLayers.LiGRUCell"><code>RecurrentLayers.LiGRUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LiGRUCell((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/pdf/1803.10225">Light gated recurrent unit</a>. The implementation does not include the batch normalization as described in the original paper. See <a href="../layers/#RecurrentLayers.LiGRU"><code>LiGRU</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -29,7 +29,7 @@
 \tilde{h}_t &amp;= \text{ReLU}(W_h x_t + U_h h_{t-1}), \\
 h_t &amp;= z_t \odot h_{t-1} + (1 - z_t) \odot \tilde{h}_t
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">ligrucell(inp, state)
-ligrucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ligrucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the LiGRUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/ligru_cell.jl#L10-L52">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MGUCell" href="#RecurrentLayers.MGUCell"><code>RecurrentLayers.MGUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MGUCell((input_size =&gt; hidden_size)::Pair;
+ligrucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ligrucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the LiGRUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/ligru_cell.jl#L10-L53">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MGUCell" href="#RecurrentLayers.MGUCell"><code>RecurrentLayers.MGUCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MGUCell((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/pdf/1603.09420">Minimal gated unit</a>. See <a href="../layers/#RecurrentLayers.MGU"><code>MGU</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -37,7 +37,7 @@
 \tilde{h}_t &amp;= \tanh(W_h x_t + U_h (f_t \odot h_{t-1}) + b_h), \\
 h_t         &amp;= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mgucell(inp, state)
-mgucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mgucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MGUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mgu_cell.jl#L10-L50">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.NASCell" href="#RecurrentLayers.NASCell"><code>RecurrentLayers.NASCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NASCell((input_size =&gt; hidden_size);
+mgucell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mgucell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MGUCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mgu_cell.jl#L10-L51">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.NASCell" href="#RecurrentLayers.NASCell"><code>RecurrentLayers.NASCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NASCell((input_size =&gt; hidden_size);
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/pdf/1611.01578">Neural Architecture Search unit</a>. See <a href="../layers/#RecurrentLayers.NAS"><code>NAS</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -65,7 +65,7 @@
 l_5 &amp;= \tanh(l_3 + l_4) \\
 h_{\text{new}} &amp;= \tanh(c_{\text{new}} \cdot l_5)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">nascell(inp, (state, cstate))
-nascell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the NASCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/nas_cell.jl#L34-L96">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHNCell" href="#RecurrentLayers.RHNCell"><code>RecurrentLayers.RHNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHNCell((input_size =&gt; hidden_size), depth=3;
+nascell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the NASCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/nas_cell.jl#L34-L97">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHNCell" href="#RecurrentLayers.RHNCell"><code>RecurrentLayers.RHNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHNCell((input_size =&gt; hidden_size), depth=3;
     couple_carry::Bool = true,
     cell_kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1607.03474">Recurrent highway network</a>. See <a href="#RecurrentLayers.RHNCellUnit"><code>RHNCellUnit</code></a> for a the unit component of this layer. See <a href="../layers/#RecurrentLayers.RHN"><code>RHN</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>depth</code>: depth of the recurrence. Default is 3</li><li><code>couple_carry</code>: couples the carry gate and the transform gate. Default <code>true</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 s_{\ell}^{[t]} &amp;= h_{\ell}^{[t]} \odot t_{\ell}^{[t]} + s_{\ell-1}^{[t]} \odot c_{\ell}^{[t]}, \\
@@ -73,9 +73,9 @@
 h_{\ell}^{[t]} &amp;= \tanh(W_h x^{[t]}\mathbb{I}_{\ell = 1} + U_{h_{\ell}} s_{\ell-1}^{[t]} + b_{h_{\ell}}), \\
 t_{\ell}^{[t]} &amp;= \sigma(W_t x^{[t]}\mathbb{I}_{\ell = 1} + U_{t_{\ell}} s_{\ell-1}^{[t]} + b_{t_{\ell}}), \\
 c_{\ell}^{[t]} &amp;= \sigma(W_c x^{[t]}\mathbb{I}_{\ell = 1} + U_{c_{\ell}} s_{\ell-1}^{[t]} + b_{c_{\ell}})
-\end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">rnncell(inp, [state])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/rhn_cell.jl#L51-L83">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHNCellUnit" href="#RecurrentLayers.RHNCellUnit"><code>RecurrentLayers.RHNCellUnit</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHNCellUnit((input_size =&gt; hidden_size)::Pair;
+\end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">rnncell(inp, [state])</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/rhn_cell.jl#L51-L83">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHNCellUnit" href="#RecurrentLayers.RHNCellUnit"><code>RecurrentLayers.RHNCellUnit</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHNCellUnit((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
-    bias = true)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/rhn_cell.jl#L11-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT1Cell" href="#RecurrentLayers.MUT1Cell"><code>RecurrentLayers.MUT1Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT1Cell((input_size =&gt; hidden_size);
+    bias = true)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/rhn_cell.jl#L11-L15">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT1Cell" href="#RecurrentLayers.MUT1Cell"><code>RecurrentLayers.MUT1Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT1Cell((input_size =&gt; hidden_size);
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 1 cell</a>. See <a href="../layers/#RecurrentLayers.MUT1"><code>MUT1</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -84,7 +84,7 @@
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + \tanh(W_h x_t) + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mutcell(inp, state)
-mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L10-L51">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT2Cell" href="#RecurrentLayers.MUT2Cell"><code>RecurrentLayers.MUT2Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT2Cell((input_size =&gt; hidden_size);
+mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L10-L52">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT2Cell" href="#RecurrentLayers.MUT2Cell"><code>RecurrentLayers.MUT2Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT2Cell((input_size =&gt; hidden_size);
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 2 cell</a>. See <a href="../layers/#RecurrentLayers.MUT2"><code>MUT2</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -93,7 +93,7 @@
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mutcell(inp, state)
-mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L141-L182">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT3Cell" href="#RecurrentLayers.MUT3Cell"><code>RecurrentLayers.MUT3Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT3Cell((input_size =&gt; hidden_size);
+mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L143-L185">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT3Cell" href="#RecurrentLayers.MUT3Cell"><code>RecurrentLayers.MUT3Cell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT3Cell((input_size =&gt; hidden_size);
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 3 cell</a>. See <a href="../layers/#RecurrentLayers.MUT3"><code>MUT3</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -102,7 +102,7 @@
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mutcell(inp, state)
-mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L272-L313">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.SCRNCell" href="#RecurrentLayers.SCRNCell"><code>RecurrentLayers.SCRNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SCRNCell((input_size =&gt; hidden_size)::Pair;
+mutcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mutcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUTCell. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>, </li></ul><p>a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L276-L318">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.SCRNCell" href="#RecurrentLayers.SCRNCell"><code>RecurrentLayers.SCRNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SCRNCell((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true,
@@ -111,7 +111,7 @@
 h_t &amp;= \sigma(W_h s_t + U_h h_{t-1} + b_h), \\
 y_t &amp;= f(U_y h_t + W_y s_t)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">scrncell(inp, (state, cstate))
-scrncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the scrncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the SCRNCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/scrn_cell.jl#L13-L57">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.PeepholeLSTMCell" href="#RecurrentLayers.PeepholeLSTMCell"><code>RecurrentLayers.PeepholeLSTMCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PeepholeLSTMCell((input_size =&gt; hidden_size)::Pair;
+scrncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the scrncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the SCRNCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/scrn_cell.jl#L13-L58">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.PeepholeLSTMCell" href="#RecurrentLayers.PeepholeLSTMCell"><code>RecurrentLayers.PeepholeLSTMCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PeepholeLSTMCell((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">Peephole long short term memory cell</a>. See <a href="../layers/#RecurrentLayers.PeepholeLSTM"><code>PeepholeLSTM</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -121,14 +121,14 @@
 c_t &amp;= f_t \odot c_{t-1} + i_t \odot \sigma_c(W_c x_t + b_c), \\
 h_t &amp;= o_t \odot \sigma_h(c_t).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">peepholelstmcell(inp, (state, cstate))
-peepholelstmcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the peepholelstmcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the PeepholeLSTMCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/peepholelstm_cell.jl#L10-L55">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastRNNCell" href="#RecurrentLayers.FastRNNCell"><code>RecurrentLayers.FastRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastRNNCell((input_size =&gt; hidden_size), [activation];
+peepholelstmcell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the peepholelstmcell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the PeepholeLSTMCell. They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where <code>output = new_state</code> is the new hidden state and <code>state = (new_state, new_cstate)</code> is the new hidden and cell state.  They are tensors of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/peepholelstm_cell.jl#L10-L56">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastRNNCell" href="#RecurrentLayers.FastRNNCell"><code>RecurrentLayers.FastRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastRNNCell((input_size =&gt; hidden_size), [activation];
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast recurrent neural network cell</a>. See <a href="../layers/#RecurrentLayers.FastRNN"><code>FastRNN</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 \tilde{h}_t &amp;= \sigma(W_h x_t + U_h h_{t-1} + b), \\
 h_t &amp;= \alpha \tilde{h}_t + \beta h_{t-1}
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">fastrnncell(inp, state)
-fastrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/fastrnn_cell.jl#L13-L53">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastGRNNCell" href="#RecurrentLayers.FastGRNNCell"><code>RecurrentLayers.FastGRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastGRNNCell((input_size =&gt; hidden_size), [activation];
+fastrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/fastrnn_cell.jl#L13-L54">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastGRNNCell" href="#RecurrentLayers.FastGRNNCell"><code>RecurrentLayers.FastGRNNCell</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastGRNNCell((input_size =&gt; hidden_size), [activation];
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast gated recurrent neural network cell</a>. See <a href="../layers/#RecurrentLayers.FastGRNN"><code>FastGRNN</code></a> for a layer that processes entire sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
@@ -136,4 +136,4 @@
 \tilde{h}_t &amp;= \tanh(W_h x_t + U_h h_{t-1} + b_h), \\
 h_t &amp;= \big((\zeta (1 - z_t) + \nu) \odot \tilde{h}_t\big) + z_t \odot h_{t-1}
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">fastgrnncell(inp, state)
-fastgrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastgrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastGRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/fastrnn_cell.jl#L148-L190">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Home</a><a class="docs-footer-nextpage" href="../layers/">Layers »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 08:48">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+fastgrnncell(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastgrnncell. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastGRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>A tuple <code>(output, state)</code>, where both elements are given by the updated state <code>new_state</code>,  a tensor of size <code>hidden_size</code> or <code>hidden_size x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/fastrnn_cell.jl#L150-L193">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Home</a><a class="docs-footer-nextpage" href="../layers/">Layers »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 10:29">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/api/layers/index.html b/dev/api/layers/index.html
index 48f0f3e..841b9ea 100644
--- a/dev/api/layers/index.html
+++ b/dev/api/layers/index.html
@@ -6,24 +6,24 @@
 c_t         &amp;= i_t \odot \tilde{c}_t + f_t \odot c_{t-1}, \\
 h_t         &amp;= g(c_t)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">ran(inp, (state, cstate))
-ran(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ran. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the RAN.    They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>.   If not provided, they are assumed to be vectors of zeros</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/ran_cell.jl#L93-L136">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.IndRNN" href="#RecurrentLayers.IndRNN"><code>RecurrentLayers.IndRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndRNN((input_size, hidden_size)::Pair, σ = tanh, σ=relu;
+ran(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ran. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the RAN.  They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/ran_cell.jl#L94-L138">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.IndRNN" href="#RecurrentLayers.IndRNN"><code>RecurrentLayers.IndRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">IndRNN((input_size, hidden_size)::Pair, σ = tanh, σ=relu;
     kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1803.04831">Independently recurrent network</a>. See <a href="../cells/#RecurrentLayers.IndRNNCell"><code>IndRNNCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>σ</code>: activation function. Default is <code>tanh</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\mathbf{h}_{t} = \sigma(\mathbf{W} \mathbf{x}_t + \mathbf{u} \odot \mathbf{h}_{t-1} + \mathbf{b})\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">indrnn(inp, state)
-indrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the indrnn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the IndRNN. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/indrnn_cell.jl#L79-L112">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LightRU" href="#RecurrentLayers.LightRU"><code>RecurrentLayers.LightRU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LightRU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://www.mdpi.com/2079-9292/13/16/3204">Light recurrent unit network</a>. See <a href="../cells/#RecurrentLayers.LightRUCell"><code>LightRUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+indrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the indrnn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the IndRNN. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/indrnn_cell.jl#L80-L114">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LightRU" href="#RecurrentLayers.LightRU"><code>RecurrentLayers.LightRU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LightRU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://www.mdpi.com/2079-9292/13/16/3204">Light recurrent unit network</a>. See <a href="../cells/#RecurrentLayers.LightRUCell"><code>LightRUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 \tilde{h}_t &amp;= \tanh(W_h x_t), \\
 f_t         &amp;= \delta(W_f x_t + U_f h_{t-1} + b_f), \\
 h_t         &amp;= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t.
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">lightru(inp, state)
-lightru(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the lightru. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the LightRU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/lightru_cell.jl#L87-L123">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LiGRU" href="#RecurrentLayers.LiGRU"><code>RecurrentLayers.LiGRU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LiGRU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1803.10225">Light gated recurrent network</a>. The implementation does not include the batch normalization as described in the original paper. See <a href="../cells/#RecurrentLayers.LiGRUCell"><code>LiGRUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+lightru(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the lightru. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the LightRU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/lightru_cell.jl#L88-L125">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.LiGRU" href="#RecurrentLayers.LiGRU"><code>RecurrentLayers.LiGRU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">LiGRU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1803.10225">Light gated recurrent network</a>. The implementation does not include the batch normalization as described in the original paper. See <a href="../cells/#RecurrentLayers.LiGRUCell"><code>LiGRUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 z_t &amp;= \sigma(W_z x_t + U_z h_{t-1}), \\
 \tilde{h}_t &amp;= \text{ReLU}(W_h x_t + U_h h_{t-1}), \\
 h_t &amp;= z_t \odot h_{t-1} + (1 - z_t) \odot \tilde{h}_t
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">ligru(inp, state)
-ligru(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ligru. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the LiGRU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/ligru_cell.jl#L87-L125">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MGU" href="#RecurrentLayers.MGU"><code>RecurrentLayers.MGU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MGU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1603.09420">Minimal gated unit network</a>. See <a href="../cells/#RecurrentLayers.MGUCell"><code>MGUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+ligru(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the ligru. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the LiGRU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/ligru_cell.jl#L88-L127">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MGU" href="#RecurrentLayers.MGU"><code>RecurrentLayers.MGU</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MGU((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1603.09420">Minimal gated unit network</a>. See <a href="../cells/#RecurrentLayers.MGUCell"><code>MGUCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 f_t         &amp;= \sigma(W_f x_t + U_f h_{t-1} + b_f), \\
 \tilde{h}_t &amp;= \tanh(W_h x_t + U_h (f_t \odot h_{t-1}) + b_h), \\
 h_t         &amp;= (1 - f_t) \odot h_{t-1} + f_t \odot \tilde{h}_t
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mgu(inp, state)
-mgu(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mgu. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MGU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mgu_cell.jl#L86-L122">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.NAS" href="#RecurrentLayers.NAS"><code>RecurrentLayers.NAS</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NAS((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1611.01578">Neural Architecture Search unit</a>. See <a href="../cells/#RecurrentLayers.NASCell"><code>NASCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+mgu(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mgu. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MGU. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mgu_cell.jl#L87-L124">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.NAS" href="#RecurrentLayers.NAS"><code>RecurrentLayers.NAS</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">NAS((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1611.01578">Neural Architecture Search unit</a>. See <a href="../cells/#RecurrentLayers.NASCell"><code>NASCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 \text{First Layer Outputs:} &amp; \\
 o_1 &amp;= \sigma(W_i^{(1)} x_t + W_h^{(1)} h_{t-1} + b^{(1)}), \\
 o_2 &amp;= \text{ReLU}(W_i^{(2)} x_t + W_h^{(2)} h_{t-1} + b^{(2)}), \\
@@ -48,31 +48,31 @@
 l_5 &amp;= \tanh(l_3 + l_4) \\
 h_{\text{new}} &amp;= \tanh(c_{\text{new}} \cdot l_5)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">nas(inp, (state, cstate))
-nas(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the nas. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the NAS.    They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>.   If not provided, they are assumed to be vectors of zeros</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/nas_cell.jl#L153-L210">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHN" href="#RecurrentLayers.RHN"><code>RecurrentLayers.RHN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHN((input_size =&gt; hidden_size)::Pair depth=3; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1607.03474">Recurrent highway network</a>. See <a href="../cells/#RecurrentLayers.RHNCellUnit"><code>RHNCellUnit</code></a> for a the unit component of this layer. See <a href="../cells/#RecurrentLayers.RHNCell"><code>RHNCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>depth</code>: depth of the recurrence. Default is 3</li><li><code>couple_carry</code>: couples the carry gate and the transform gate. Default <code>true</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+nas(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the nas. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the NAS.  They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/nas_cell.jl#L154-L212">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.RHN" href="#RecurrentLayers.RHN"><code>RecurrentLayers.RHN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">RHN((input_size =&gt; hidden_size)::Pair depth=3; kwargs...)</code></pre><p><a href="https://arxiv.org/pdf/1607.03474">Recurrent highway network</a>. See <a href="../cells/#RecurrentLayers.RHNCellUnit"><code>RHNCellUnit</code></a> for a the unit component of this layer. See <a href="../cells/#RecurrentLayers.RHNCell"><code>RHNCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>depth</code>: depth of the recurrence. Default is 3</li><li><code>couple_carry</code>: couples the carry gate and the transform gate. Default <code>true</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 s_{\ell}^{[t]} &amp;= h_{\ell}^{[t]} \odot t_{\ell}^{[t]} + s_{\ell-1}^{[t]} \odot c_{\ell}^{[t]}, \\
 \text{where} \\
 h_{\ell}^{[t]} &amp;= \tanh(W_h x^{[t]}\mathbb{I}_{\ell = 1} + U_{h_{\ell}} s_{\ell-1}^{[t]} + b_{h_{\ell}}), \\
 t_{\ell}^{[t]} &amp;= \sigma(W_t x^{[t]}\mathbb{I}_{\ell = 1} + U_{t_{\ell}} s_{\ell-1}^{[t]} + b_{t_{\ell}}), \\
 c_{\ell}^{[t]} &amp;= \sigma(W_c x^{[t]}\mathbb{I}_{\ell = 1} + U_{c_{\ell}} s_{\ell-1}^{[t]} + b_{c_{\ell}})
-\end{aligned}\]</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/rhn_cell.jl#L143-L168">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT1" href="#RecurrentLayers.MUT1"><code>RecurrentLayers.MUT1</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT1((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 1 network</a>. See <a href="../cells/#RecurrentLayers.MUT1Cell"><code>MUT1Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+\end{aligned}\]</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/rhn_cell.jl#L143-L168">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT1" href="#RecurrentLayers.MUT1"><code>RecurrentLayers.MUT1</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT1((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 1 network</a>. See <a href="../cells/#RecurrentLayers.MUT1Cell"><code>MUT1Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 z &amp;= \sigma(W_z x_t + b_z), \\
 r &amp;= \sigma(W_r x_t + U_r h_t + b_r), \\
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + \tanh(W_h x_t) + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mut(inp, state)
-mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L89-L126">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT2" href="#RecurrentLayers.MUT2"><code>RecurrentLayers.MUT2</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT2Cell((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 2 network</a>. See <a href="../cells/#RecurrentLayers.MUT2Cell"><code>MUT2Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L90-L128">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT2" href="#RecurrentLayers.MUT2"><code>RecurrentLayers.MUT2</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT2Cell((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 2 network</a>. See <a href="../cells/#RecurrentLayers.MUT2Cell"><code>MUT2Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 z &amp;= \sigma(W_z x_t + U_z h_t + b_z), \\
 r &amp;= \sigma(x_t + U_r h_t + b_r), \\
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mut(inp, state)
-mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L220-L257">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT3" href="#RecurrentLayers.MUT3"><code>RecurrentLayers.MUT3</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT3((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 3 network</a>. See <a href="../cells/#RecurrentLayers.MUT3Cell"><code>MUT3Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L223-L261">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.MUT3" href="#RecurrentLayers.MUT3"><code>RecurrentLayers.MUT3</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">MUT3((input_size =&gt; hidden_size); kwargs...)</code></pre><p><a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">Mutated unit 3 network</a>. See <a href="../cells/#RecurrentLayers.MUT3Cell"><code>MUT3Cell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 z &amp;= \sigma(W_z x_t + U_z \tanh(h_t) + b_z), \\
 r &amp;= \sigma(W_r x_t + U_r h_t + b_r), \\
 h_{t+1} &amp;= \tanh(U_h (r \odot h_t) + W_h x_t + b_h) \odot z \\
 &amp;\quad + h_t \odot (1 - z).
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">mut(inp, state)
-mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/mut_cell.jl#L349-L386">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.SCRN" href="#RecurrentLayers.SCRN"><code>RecurrentLayers.SCRN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SCRN((input_size =&gt; hidden_size)::Pair;
+mut(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the mut. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the MUT. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/mut_cell.jl#L354-L392">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.SCRN" href="#RecurrentLayers.SCRN"><code>RecurrentLayers.SCRN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">SCRN((input_size =&gt; hidden_size)::Pair;
     init_kernel = glorot_uniform,
     init_recurrent_kernel = glorot_uniform,
     bias = true,
@@ -81,20 +81,20 @@
 h_t &amp;= \sigma(W_h s_t + U_h h_{t-1} + b_h), \\
 y_t &amp;= f(U_y h_t + W_y s_t)
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">scrn(inp, (state, cstate))
-scrn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the scrn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the SCRN.    They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>.   If not provided, they are assumed to be vectors of zeros</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/scrn_cell.jl#L97-L138">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.PeepholeLSTM" href="#RecurrentLayers.PeepholeLSTM"><code>RecurrentLayers.PeepholeLSTM</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PeepholeLSTM((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">Peephole long short term memory network</a>. See <a href="../cells/#RecurrentLayers.PeepholeLSTMCell"><code>PeepholeLSTMCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{align}
+scrn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the scrn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the SCRN.  They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/scrn_cell.jl#L98-L140">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.PeepholeLSTM" href="#RecurrentLayers.PeepholeLSTM"><code>RecurrentLayers.PeepholeLSTM</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">PeepholeLSTM((input_size =&gt; hidden_size)::Pair; kwargs...)</code></pre><p><a href="https://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">Peephole long short term memory network</a>. See <a href="../cells/#RecurrentLayers.PeepholeLSTMCell"><code>PeepholeLSTMCell</code></a> for a layer that processes a single sequence.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{align}
 f_t &amp;= \sigma_g(W_f x_t + U_f c_{t-1} + b_f), \\
 i_t &amp;= \sigma_g(W_i x_t + U_i c_{t-1} + b_i), \\
 o_t &amp;= \sigma_g(W_o x_t + U_o c_{t-1} + b_o), \\
 c_t &amp;= f_t \odot c_{t-1} + i_t \odot \sigma_c(W_c x_t + b_c), \\
 h_t &amp;= o_t \odot \sigma_h(c_t).
 \end{align}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">peepholelstm(inp, (state, cstate))
-peepholelstm(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the peepholelstm. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the PeepholeLSTM.    They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>.   If not provided, they are assumed to be vectors of zeros</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/peepholelstm_cell.jl#L90-L128">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastRNN" href="#RecurrentLayers.FastRNN"><code>RecurrentLayers.FastRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastRNN((input_size =&gt; hidden_size), [activation]; kwargs...)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast recurrent neural network</a>. See <a href="../cells/#RecurrentLayers.FastRNNCell"><code>FastRNNCell</code></a> for a layer that processes a single sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+peepholelstm(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the peepholelstm. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>(state, cstate)</code>: A tuple containing the hidden and cell states of the PeepholeLSTM.  They should be vectors of size <code>hidden_size</code> or matrices of size <code>hidden_size x batch_size</code>. If not provided, they are assumed to be vectors of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/peepholelstm_cell.jl#L91-L130">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastRNN" href="#RecurrentLayers.FastRNN"><code>RecurrentLayers.FastRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastRNN((input_size =&gt; hidden_size), [activation]; kwargs...)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast recurrent neural network</a>. See <a href="../cells/#RecurrentLayers.FastRNNCell"><code>FastRNNCell</code></a> for a layer that processes a single sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 \tilde{h}_t &amp;= \sigma(W_h x_t + U_h h_{t-1} + b), \\
 h_t &amp;= \alpha \tilde{h}_t + \beta h_{t-1}
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">fastrnn(inp, state)
-fastrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastRNN. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/fastrnn_cell.jl#L93-L129">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastGRNN" href="#RecurrentLayers.FastGRNN"><code>RecurrentLayers.FastGRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastGRNN((input_size =&gt; hidden_size), [activation]; kwargs...)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast recurrent neural network</a>. See <a href="../cells/#RecurrentLayers.FastGRNNCell"><code>FastGRNNCell</code></a> for a layer that processes a single sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
+fastrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastrnn. It should be a vector of size <code>input_size x len</code> or a matrix of size <code>input_size x len x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastRNN. If given, it is a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/fastrnn_cell.jl#L94-L131">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="RecurrentLayers.FastGRNN" href="#RecurrentLayers.FastGRNN"><code>RecurrentLayers.FastGRNN</code></a> — <span class="docstring-category">Type</span><span class="is-flex-grow-1 docstring-article-toggle-button" title="Collapse docstring"></span></header><section><div><pre><code class="language-julia hljs">FastGRNN((input_size =&gt; hidden_size), [activation]; kwargs...)</code></pre><p><a href="https://arxiv.org/abs/1901.02358">Fast recurrent neural network</a>. See <a href="../cells/#RecurrentLayers.FastGRNNCell"><code>FastGRNNCell</code></a> for a layer that processes a single sequences.</p><p><strong>Arguments</strong></p><ul><li><code>input_size =&gt; hidden_size</code>: input and inner dimension of the layer</li><li><code>activation</code>: the activation function, defaults to <code>tanh_fast</code></li><li><code>init_kernel</code>: initializer for the input to hidden weights</li><li><code>init_recurrent_kernel</code>: initializer for the hidden to hidden weights</li><li><code>bias</code>: include a bias or not. Default is <code>true</code></li></ul><p><strong>Equations</strong></p><p class="math-container">\[\begin{aligned}
 z_t &amp;= \sigma(W_z x_t + U_z h_{t-1} + b_z), \\
 \tilde{h}_t &amp;= \tanh(W_h x_t + U_h h_{t-1} + b_h), \\
 h_t &amp;= \big((\zeta (1 - z_t) + \nu) \odot \tilde{h}_t\big) + z_t \odot h_{t-1}
 \end{aligned}\]</p><p><strong>Forward</strong></p><pre><code class="nohighlight hljs">fastgrnn(inp, state)
-fastgrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastgrnn. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastGRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2a010ff471d34e6a65fd0b829158528682670eb9/src/fastrnn_cell.jl#L234-L272">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../cells/">« Cells</a><a class="docs-footer-nextpage" href="../../roadmap/">Roadmap »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 08:48">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+fastgrnn(inp)</code></pre><p><strong>Arguments</strong></p><ul><li><code>inp</code>: The input to the fastgrnn. It should be a vector of size <code>input_size</code> or a matrix of size <code>input_size x batch_size</code>.</li><li><code>state</code>: The hidden state of the FastGRNN. It should be a vector of size <code>hidden_size</code> or a matrix of size <code>hidden_size x batch_size</code>. If not provided, it is assumed to be a vector of zeros, initialized by <a href="https://fluxml.ai/Flux.jl/stable/reference/models/layers/#Flux.initialstates"><code>Flux.initialstates</code></a>.</li></ul><p><strong>Returns</strong></p><ul><li>New hidden states <code>new_states</code> as an array of size <code>hidden_size x len x batch_size</code>.</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/2b043c11be0a0c5c235f991fb72b4e18c3098481/src/fastrnn_cell.jl#L237-L276">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../cells/">« Cells</a><a class="docs-footer-nextpage" href="../../roadmap/">Roadmap »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 10:29">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index 58674d6..1b857f8 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · RecurrentLayers.jl</title><meta name="title" content="Home · RecurrentLayers.jl"/><meta property="og:title" content="Home · RecurrentLayers.jl"/><meta property="twitter:title" content="Home · RecurrentLayers.jl"/><meta name="description" content="Documentation for RecurrentLayers.jl."/><meta property="og:description" content="Documentation for RecurrentLayers.jl."/><meta property="twitter:description" content="Documentation for RecurrentLayers.jl."/><meta property="og:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><meta property="twitter:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><link rel="canonical" href="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script><link href="assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="RecurrentLayers.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href>RecurrentLayers.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Home</a><ul class="internal"><li><a class="tocitem" href="#Implemented-layers"><span>Implemented layers</span></a></li><li><a class="tocitem" href="#Contributing"><span>Contributing</span></a></li></ul></li><li><span class="tocitem">API Documentation</span><ul><li><a class="tocitem" href="api/cells/">Cells</a></li><li><a class="tocitem" href="api/layers/">Layers</a></li></ul></li><li><a class="tocitem" href="roadmap/">Roadmap</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Home</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="RecurrentLayers"><a class="docs-heading-anchor" href="#RecurrentLayers">RecurrentLayers</a><a id="RecurrentLayers-1"></a><a class="docs-heading-anchor-permalink" href="#RecurrentLayers" title="Permalink"></a></h1><p>RecurrentLayers.jl extends <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a> recurrent layers offering by providing implementations of bleeding edge recurrent layers not commonly available in base deep learning libraries. It is designed for a seamless integration with the larger Flux ecosystem, enabling researchers and practitioners to leverage the latest developments in recurrent neural networks.</p><h2 id="Implemented-layers"><a class="docs-heading-anchor" href="#Implemented-layers">Implemented layers</a><a id="Implemented-layers-1"></a><a class="docs-heading-anchor-permalink" href="#Implemented-layers" title="Permalink"></a></h2><ul><li>Minimal gated unit as <code>MGUCell</code> <a href="https://arxiv.org/abs/1603.09420">arxiv</a></li><li>Light gated recurrent unit as <code>LiGRUCell</code> <a href="https://arxiv.org/abs/1803.10225">arxiv</a></li><li>Independently recurrent neural networks as <code>IndRNNCell</code> <a href="https://arxiv.org/abs/1803.04831">arxiv</a></li><li>Recurrent addictive networks as <code>RANCell</code> <a href="https://arxiv.org/abs/1705.07393">arxiv</a></li><li>Recurrent highway network as <code>RHNCell</code> <a href="https://arxiv.org/pdf/1607.03474">arixv</a></li><li>Light recurrent unit as <code>LightRUCell</code> <a href="https://www.mdpi.com/2079-9292/13/16/3204">pub</a></li><li>Neural architecture search unit <code>NASCell</code> <a href="https://arxiv.org/abs/1611.01578">arxiv</a></li><li>Evolving recurrent neural networks as <code>MUT1Cell</code>, <code>MUT2Cell</code>, <code>MUT3Cell</code> <a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">pub</a></li><li>Structurally constrained recurrent neural network as <code>SCRNCell</code> <a href="https://arxiv.org/pdf/1412.7753">arxiv</a></li><li>Peephole long short term memory as <code>PeepholeLSTMCell</code> <a href="https://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">pub</a></li><li><code>FastRNNCell</code> and <code>FastGRNNCell</code> <a href="https://arxiv.org/pdf/1901.02358">arxiv</a></li></ul><h2 id="Contributing"><a class="docs-heading-anchor" href="#Contributing">Contributing</a><a id="Contributing-1"></a><a class="docs-heading-anchor-permalink" href="#Contributing" title="Permalink"></a></h2><p>Contributions are always welcome! We specifically look for :</p><ul><li>Recurrent cells you would like to see implemented </li><li>Benchmarks</li><li>Any bugs and mistakes of course!</li><li>Documentation, in any form: examples, how tos, docstrings  </li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="api/cells/">Cells »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 08:48">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · RecurrentLayers.jl</title><meta name="title" content="Home · RecurrentLayers.jl"/><meta property="og:title" content="Home · RecurrentLayers.jl"/><meta property="twitter:title" content="Home · RecurrentLayers.jl"/><meta name="description" content="Documentation for RecurrentLayers.jl."/><meta property="og:description" content="Documentation for RecurrentLayers.jl."/><meta property="twitter:description" content="Documentation for RecurrentLayers.jl."/><meta property="og:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><meta property="twitter:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><link rel="canonical" href="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/"/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script><link href="assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href><img src="assets/logo.png" alt="RecurrentLayers.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href>RecurrentLayers.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Home</a><ul class="internal"><li><a class="tocitem" href="#Implemented-layers"><span>Implemented layers</span></a></li><li><a class="tocitem" href="#Contributing"><span>Contributing</span></a></li></ul></li><li><span class="tocitem">API Documentation</span><ul><li><a class="tocitem" href="api/cells/">Cells</a></li><li><a class="tocitem" href="api/layers/">Layers</a></li></ul></li><li><a class="tocitem" href="roadmap/">Roadmap</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Home</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/main/docs/src/index.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="RecurrentLayers"><a class="docs-heading-anchor" href="#RecurrentLayers">RecurrentLayers</a><a id="RecurrentLayers-1"></a><a class="docs-heading-anchor-permalink" href="#RecurrentLayers" title="Permalink"></a></h1><p>RecurrentLayers.jl extends <a href="https://github.com/FluxML/Flux.jl">Flux.jl</a> recurrent layers offering by providing implementations of bleeding edge recurrent layers not commonly available in base deep learning libraries. It is designed for a seamless integration with the larger Flux ecosystem, enabling researchers and practitioners to leverage the latest developments in recurrent neural networks.</p><h2 id="Implemented-layers"><a class="docs-heading-anchor" href="#Implemented-layers">Implemented layers</a><a id="Implemented-layers-1"></a><a class="docs-heading-anchor-permalink" href="#Implemented-layers" title="Permalink"></a></h2><ul><li>Minimal gated unit as <code>MGUCell</code> <a href="https://arxiv.org/abs/1603.09420">arxiv</a></li><li>Light gated recurrent unit as <code>LiGRUCell</code> <a href="https://arxiv.org/abs/1803.10225">arxiv</a></li><li>Independently recurrent neural networks as <code>IndRNNCell</code> <a href="https://arxiv.org/abs/1803.04831">arxiv</a></li><li>Recurrent addictive networks as <code>RANCell</code> <a href="https://arxiv.org/abs/1705.07393">arxiv</a></li><li>Recurrent highway network as <code>RHNCell</code> <a href="https://arxiv.org/pdf/1607.03474">arixv</a></li><li>Light recurrent unit as <code>LightRUCell</code> <a href="https://www.mdpi.com/2079-9292/13/16/3204">pub</a></li><li>Neural architecture search unit <code>NASCell</code> <a href="https://arxiv.org/abs/1611.01578">arxiv</a></li><li>Evolving recurrent neural networks as <code>MUT1Cell</code>, <code>MUT2Cell</code>, <code>MUT3Cell</code> <a href="https://proceedings.mlr.press/v37/jozefowicz15.pdf">pub</a></li><li>Structurally constrained recurrent neural network as <code>SCRNCell</code> <a href="https://arxiv.org/pdf/1412.7753">arxiv</a></li><li>Peephole long short term memory as <code>PeepholeLSTMCell</code> <a href="https://www.jmlr.org/papers/volume3/gers02a/gers02a.pdf">pub</a></li><li><code>FastRNNCell</code> and <code>FastGRNNCell</code> <a href="https://arxiv.org/pdf/1901.02358">arxiv</a></li></ul><h2 id="Contributing"><a class="docs-heading-anchor" href="#Contributing">Contributing</a><a id="Contributing-1"></a><a class="docs-heading-anchor-permalink" href="#Contributing" title="Permalink"></a></h2><p>Contributions are always welcome! We specifically look for :</p><ul><li>Recurrent cells you would like to see implemented </li><li>Benchmarks</li><li>Any bugs and mistakes of course!</li><li>Documentation, in any form: examples, how tos, docstrings  </li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="api/cells/">Cells »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 10:29">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/roadmap/index.html b/dev/roadmap/index.html
index ce5c103..63f688e 100644
--- a/dev/roadmap/index.html
+++ b/dev/roadmap/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Roadmap · RecurrentLayers.jl</title><meta name="title" content="Roadmap · RecurrentLayers.jl"/><meta property="og:title" content="Roadmap · RecurrentLayers.jl"/><meta property="twitter:title" content="Roadmap · RecurrentLayers.jl"/><meta name="description" content="Documentation for RecurrentLayers.jl."/><meta property="og:description" content="Documentation for RecurrentLayers.jl."/><meta property="twitter:description" content="Documentation for RecurrentLayers.jl."/><meta property="og:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><meta property="twitter:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><link rel="canonical" href="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script><link href="../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="RecurrentLayers.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../">RecurrentLayers.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">API Documentation</span><ul><li><a class="tocitem" href="../api/cells/">Cells</a></li><li><a class="tocitem" href="../api/layers/">Layers</a></li></ul></li><li class="is-active"><a class="tocitem" href>Roadmap</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Roadmap</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Roadmap</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/main/docs/src/roadmap.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Roadmap"><a class="docs-heading-anchor" href="#Roadmap">Roadmap</a><a id="Roadmap-1"></a><a class="docs-heading-anchor-permalink" href="#Roadmap" title="Permalink"></a></h1><p>This page documents some planned work for RecurrentLayers.jl. Future work for this library includes additional cells such as:</p><ul><li>FastRNNs and FastGRUs (current focus) <a href="https://arxiv.org/abs/1901.02358">arxiv</a></li><li>Unitary recurrent neural networks <a href="https://arxiv.org/abs/1611.00035">arxiv</a></li><li>Modern recurrent neural networks such as <a href="https://arxiv.org/abs/2303.06349">LRU</a>  and <a href="https://arxiv.org/abs/2410.01201">minLSTM/minGRU</a></li><li>Quasi recurrent neural networks <a href="https://arxiv.org/abs/1611.01576">arxiv</a></li></ul><p>Additionally, some cell-independent architectures are also planned, that expand the ability of recurrent architectures and could theoretically take any cell:</p><ul><li>Clockwork rnns <a href="https://arxiv.org/abs/1402.3511">arxiv</a></li><li>Phased rnns <a href="https://arxiv.org/abs/1610.09513">arxiv</a></li><li>Segment rnn <a href="https://arxiv.org/abs/2308.11200">arxiv</a></li><li>Fast-Slow rnns <a href="https://arxiv.org/abs/1705.08639">arxiv</a></li></ul><p>An implementation of these ideally would be, for example <code>FastSlow(RNNCell, input_size =&gt; hidden_size)</code>. More details on this soon!</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../api/layers/">« Layers</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 08:48">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Roadmap · RecurrentLayers.jl</title><meta name="title" content="Roadmap · RecurrentLayers.jl"/><meta property="og:title" content="Roadmap · RecurrentLayers.jl"/><meta property="twitter:title" content="Roadmap · RecurrentLayers.jl"/><meta name="description" content="Documentation for RecurrentLayers.jl."/><meta property="og:description" content="Documentation for RecurrentLayers.jl."/><meta property="twitter:description" content="Documentation for RecurrentLayers.jl."/><meta property="og:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><meta property="twitter:url" content="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><link rel="canonical" href="https://MartinuzziFrancesco.github.io/RecurrentLayers.jl/roadmap/"/><script data-outdated-warner src="../assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../search_index.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-mocha.css" data-theme-name="catppuccin-mocha"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-macchiato.css" data-theme-name="catppuccin-macchiato"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-frappe.css" data-theme-name="catppuccin-frappe"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/catppuccin-latte.css" data-theme-name="catppuccin-latte"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script><link href="../assets/favicon.ico" rel="icon" type="image/x-icon"/></head><body><div id="documenter"><nav class="docs-sidebar"><a class="docs-logo" href="../"><img src="../assets/logo.png" alt="RecurrentLayers.jl logo"/></a><div class="docs-package-name"><span class="docs-autofit"><a href="../">RecurrentLayers.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">API Documentation</span><ul><li><a class="tocitem" href="../api/cells/">Cells</a></li><li><a class="tocitem" href="../api/layers/">Layers</a></li></ul></li><li class="is-active"><a class="tocitem" href>Roadmap</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Roadmap</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Roadmap</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl" title="View the repository on GitHub"><span class="docs-icon fa-brands"></span><span class="docs-label is-hidden-touch">GitHub</span></a><a class="docs-navbar-link" href="https://github.com/MartinuzziFrancesco/RecurrentLayers.jl/blob/main/docs/src/roadmap.md" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="Roadmap"><a class="docs-heading-anchor" href="#Roadmap">Roadmap</a><a id="Roadmap-1"></a><a class="docs-heading-anchor-permalink" href="#Roadmap" title="Permalink"></a></h1><p>This page documents some planned work for RecurrentLayers.jl. Future work for this library includes additional cells such as:</p><ul><li>FastRNNs and FastGRUs (current focus) <a href="https://arxiv.org/abs/1901.02358">arxiv</a></li><li>Unitary recurrent neural networks <a href="https://arxiv.org/abs/1611.00035">arxiv</a></li><li>Modern recurrent neural networks such as <a href="https://arxiv.org/abs/2303.06349">LRU</a>  and <a href="https://arxiv.org/abs/2410.01201">minLSTM/minGRU</a></li><li>Quasi recurrent neural networks <a href="https://arxiv.org/abs/1611.01576">arxiv</a></li></ul><p>Additionally, some cell-independent architectures are also planned, that expand the ability of recurrent architectures and could theoretically take any cell:</p><ul><li>Clockwork rnns <a href="https://arxiv.org/abs/1402.3511">arxiv</a></li><li>Phased rnns <a href="https://arxiv.org/abs/1610.09513">arxiv</a></li><li>Segment rnn <a href="https://arxiv.org/abs/2308.11200">arxiv</a></li><li>Fast-Slow rnns <a href="https://arxiv.org/abs/1705.08639">arxiv</a></li></ul><p>An implementation of these ideally would be, for example <code>FastSlow(RNNCell, input_size =&gt; hidden_size)</code>. More details on this soon!</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../api/layers/">« Layers</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option><option value="catppuccin-latte">catppuccin-latte</option><option value="catppuccin-frappe">catppuccin-frappe</option><option value="catppuccin-macchiato">catppuccin-macchiato</option><option value="catppuccin-mocha">catppuccin-mocha</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.8.0 on <span class="colophon-date" title="Tuesday 17 December 2024 10:29">Tuesday 17 December 2024</span>. Using Julia version 1.11.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/search_index.js b/dev/search_index.js
index f6ecf73..711f692 100644
--- a/dev/search_index.js
+++ b/dev/search_index.js
@@ -1,3 +1,3 @@
 var documenterSearchIndex = {"docs":
-[{"location":"api/cells/#Cells","page":"Cells","title":"Cells","text":"","category":"section"},{"location":"api/cells/","page":"Cells","title":"Cells","text":"RANCell\nIndRNNCell\nLightRUCell\nLiGRUCell\nMGUCell\nNASCell\nRHNCell\nRHNCellUnit\nMUT1Cell\nMUT2Cell\nMUT3Cell\nSCRNCell\nPeepholeLSTMCell\nFastRNNCell\nFastGRNNCell","category":"page"},{"location":"api/cells/#RecurrentLayers.RANCell","page":"Cells","title":"RecurrentLayers.RANCell","text":"RANCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nThe RANCell, introduced in this paper,  is a recurrent cell layer which provides additional memory through the use of gates.\n\nSee RAN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildec_t = W_c x_t \ni_t         = sigma(W_i x_t + U_i h_t-1 + b_i) \nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \nc_t         = i_t odot tildec_t + f_t odot c_t-1 \nh_t         = g(c_t)\nendaligned\n\nForward\n\nrancell(inp, (state, cstate))\nrancell(inp)\n\nArguments\n\ninp: The input to the rancell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the RANCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.IndRNNCell","page":"Cells","title":"RecurrentLayers.IndRNNCell","text":"IndRNNCell((input_size => hidden_size)::Pair, σ=relu;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nIndependently recurrent cell. See IndRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nσ: activation function. Default is tanh\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nmathbfh_t = sigma(mathbfW mathbfx_t + mathbfu odot mathbfh_t-1 + mathbfb)\n\nForward\n\nindrnncell(inp, state)\nindrnncell(inp)\n\nArguments\n\ninp: The input to the indrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the IndRNNCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.LightRUCell","page":"Cells","title":"RecurrentLayers.LightRUCell","text":"LightRUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nLight recurrent unit. See LightRU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = tanh(W_h x_t) \nf_t         = delta(W_f x_t + U_f h_t-1 + b_f) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nlightrucell(inp, state)\nlightrucell(inp)\n\nArguments\n\ninp: The input to the lightrucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the LightRUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.LiGRUCell","page":"Cells","title":"RecurrentLayers.LiGRUCell","text":"LiGRUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nLight gated recurrent unit. The implementation does not include the batch normalization as described in the original paper. See LiGRU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1) \ntildeh_t = textReLU(W_h x_t + U_h h_t-1) \nh_t = z_t odot h_t-1 + (1 - z_t) odot tildeh_t\nendaligned\n\nForward\n\nligrucell(inp, state)\nligrucell(inp)\n\nArguments\n\ninp: The input to the ligrucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the LiGRUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MGUCell","page":"Cells","title":"RecurrentLayers.MGUCell","text":"MGUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMinimal gated unit. See MGU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \ntildeh_t = tanh(W_h x_t + U_h (f_t odot h_t-1) + b_h) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nmgucell(inp, state)\nmgucell(inp)\n\nArguments\n\ninp: The input to the mgucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MGUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.NASCell","page":"Cells","title":"RecurrentLayers.NASCell","text":"NASCell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nNeural Architecture Search unit. See NAS for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntextFirst Layer Outputs  \no_1 = sigma(W_i^(1) x_t + W_h^(1) h_t-1 + b^(1)) \no_2 = textReLU(W_i^(2) x_t + W_h^(2) h_t-1 + b^(2)) \no_3 = sigma(W_i^(3) x_t + W_h^(3) h_t-1 + b^(3)) \no_4 = textReLU(W_i^(4) x_t cdot W_h^(4) h_t-1) \no_5 = tanh(W_i^(5) x_t + W_h^(5) h_t-1 + b^(5)) \no_6 = sigma(W_i^(6) x_t + W_h^(6) h_t-1 + b^(6)) \no_7 = tanh(W_i^(7) x_t + W_h^(7) h_t-1 + b^(7)) \no_8 = sigma(W_i^(8) x_t + W_h^(8) h_t-1 + b^(8)) \n\ntextSecond Layer Computations  \nl_1 = tanh(o_1 cdot o_2) \nl_2 = tanh(o_3 + o_4) \nl_3 = tanh(o_5 cdot o_6) \nl_4 = sigma(o_7 + o_8) \n\ntextInject Cell State  \nl_1 = tanh(l_1 + c_textstate) \n\ntextFinal Layer Computations  \nc_textnew = l_1 cdot l_2 \nl_5 = tanh(l_3 + l_4) \nh_textnew = tanh(c_textnew cdot l_5)\nendaligned\n\nForward\n\nnascell(inp, (state, cstate))\nnascell(inp)\n\nArguments\n\ninp: The input to the fastrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the NASCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.RHNCell","page":"Cells","title":"RecurrentLayers.RHNCell","text":"RHNCell((input_size => hidden_size), depth=3;\n    couple_carry::Bool = true,\n    cell_kwargs...)\n\nRecurrent highway network. See RHNCellUnit for a the unit component of this layer. See RHN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ndepth: depth of the recurrence. Default is 3\ncouple_carry: couples the carry gate and the transform gate. Default true\ninit_kernel: initializer for the input to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ns_ell^t = h_ell^t odot t_ell^t + s_ell-1^t odot c_ell^t \ntextwhere \nh_ell^t = tanh(W_h x^tmathbbI_ell = 1 + U_h_ell s_ell-1^t + b_h_ell) \nt_ell^t = sigma(W_t x^tmathbbI_ell = 1 + U_t_ell s_ell-1^t + b_t_ell) \nc_ell^t = sigma(W_c x^tmathbbI_ell = 1 + U_c_ell s_ell-1^t + b_c_ell)\nendaligned\n\nForward\n\nrnncell(inp, [state])\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.RHNCellUnit","page":"Cells","title":"RecurrentLayers.RHNCellUnit","text":"RHNCellUnit((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    bias = true)\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT1Cell","page":"Cells","title":"RecurrentLayers.MUT1Cell","text":"MUT1Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 1 cell. See MUT1 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + tanh(W_h x_t) + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT2Cell","page":"Cells","title":"RecurrentLayers.MUT2Cell","text":"MUT2Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 2 cell. See MUT2 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z h_t + b_z) \nr = sigma(x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT3Cell","page":"Cells","title":"RecurrentLayers.MUT3Cell","text":"MUT3Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 3 cell. See MUT3 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z tanh(h_t) + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.SCRNCell","page":"Cells","title":"RecurrentLayers.SCRNCell","text":"SCRNCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true,\n    alpha = 0.0)\n\nStructurally contraint recurrent unit. See SCRN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\nalpha: structural contraint. Default is 0.0\n\nEquations\n\nbeginaligned\ns_t = (1 - alpha) W_s x_t + alpha s_t-1 \nh_t = sigma(W_h s_t + U_h h_t-1 + b_h) \ny_t = f(U_y h_t + W_y s_t)\nendaligned\n\nForward\n\nscrncell(inp, (state, cstate))\nscrncell(inp)\n\nArguments\n\ninp: The input to the scrncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the SCRNCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.PeepholeLSTMCell","page":"Cells","title":"RecurrentLayers.PeepholeLSTMCell","text":"PeepholeLSTMCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nPeephole long short term memory cell. See PeepholeLSTM for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t = sigma_g(W_f x_t + U_f c_t-1 + b_f) \ni_t = sigma_g(W_i x_t + U_i c_t-1 + b_i) \no_t = sigma_g(W_o x_t + U_o c_t-1 + b_o) \nc_t = f_t odot c_t-1 + i_t odot sigma_c(W_c x_t + b_c) \nh_t = o_t odot sigma_h(c_t)\nendaligned\n\nForward\n\npeepholelstmcell(inp, (state, cstate))\npeepholelstmcell(inp)\n\nArguments\n\ninp: The input to the peepholelstmcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the PeepholeLSTMCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.FastRNNCell","page":"Cells","title":"RecurrentLayers.FastRNNCell","text":"FastRNNCell((input_size => hidden_size), [activation];\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nFast recurrent neural network cell. See FastRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = sigma(W_h x_t + U_h h_t-1 + b) \nh_t = alpha tildeh_t + beta h_t-1\nendaligned\n\nForward\n\nfastrnncell(inp, state)\nfastrnncell(inp)\n\nArguments\n\ninp: The input to the fastrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.FastGRNNCell","page":"Cells","title":"RecurrentLayers.FastGRNNCell","text":"FastGRNNCell((input_size => hidden_size), [activation];\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nFast gated recurrent neural network cell. See FastGRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1 + b_z) \ntildeh_t = tanh(W_h x_t + U_h h_t-1 + b_h) \nh_t = big((zeta (1 - z_t) + nu) odot tildeh_tbig) + z_t odot h_t-1\nendaligned\n\nForward\n\nfastgrnncell(inp, state)\nfastgrnncell(inp)\n\nArguments\n\ninp: The input to the fastgrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastGRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#Cell-wrappers","page":"Layers","title":"Cell wrappers","text":"","category":"section"},{"location":"api/layers/","page":"Layers","title":"Layers","text":"RAN\nIndRNN\nLightRU\nLiGRU\nMGU\nNAS\nRHN\nMUT1\nMUT2\nMUT3\nSCRN\nPeepholeLSTM\nFastRNN\nFastGRNN","category":"page"},{"location":"api/layers/#RecurrentLayers.RAN","page":"Layers","title":"RecurrentLayers.RAN","text":"RAN(input_size => hidden_size; kwargs...)\n\nThe RANCell, introduced in this paper,  is a recurrent cell layer which provides additional memory through the use of gates.\n\nand returns both ht anf ct.\n\nSee RANCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildec_t = W_c x_t \ni_t         = sigma(W_i x_t + U_i h_t-1 + b_i) \nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \nc_t         = i_t odot tildec_t + f_t odot c_t-1 \nh_t         = g(c_t)\nendaligned\n\nForward\n\nran(inp, (state, cstate))\nran(inp)\n\nArguments\n\ninp: The input to the ran. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the RAN.    They should be vectors of size hidden_size or matrices of size hidden_size x batch_size.   If not provided, they are assumed to be vectors of zeros\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.IndRNN","page":"Layers","title":"RecurrentLayers.IndRNN","text":"IndRNN((input_size, hidden_size)::Pair, σ = tanh, σ=relu;\n    kwargs...)\n\nIndependently recurrent network. See IndRNNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nσ: activation function. Default is tanh\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nmathbfh_t = sigma(mathbfW mathbfx_t + mathbfu odot mathbfh_t-1 + mathbfb)\n\nForward\n\nindrnn(inp, state)\nindrnn(inp)\n\nArguments\n\ninp: The input to the indrnn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the IndRNN. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.LightRU","page":"Layers","title":"RecurrentLayers.LightRU","text":"LightRU((input_size => hidden_size)::Pair; kwargs...)\n\nLight recurrent unit network. See LightRUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = tanh(W_h x_t) \nf_t         = delta(W_f x_t + U_f h_t-1 + b_f) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nlightru(inp, state)\nlightru(inp)\n\nArguments\n\ninp: The input to the lightru. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the LightRU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.LiGRU","page":"Layers","title":"RecurrentLayers.LiGRU","text":"LiGRU((input_size => hidden_size)::Pair; kwargs...)\n\nLight gated recurrent network. The implementation does not include the batch normalization as described in the original paper. See LiGRUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1) \ntildeh_t = textReLU(W_h x_t + U_h h_t-1) \nh_t = z_t odot h_t-1 + (1 - z_t) odot tildeh_t\nendaligned\n\nForward\n\nligru(inp, state)\nligru(inp)\n\nArguments\n\ninp: The input to the ligru. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the LiGRU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MGU","page":"Layers","title":"RecurrentLayers.MGU","text":"MGU((input_size => hidden_size)::Pair; kwargs...)\n\nMinimal gated unit network. See MGUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \ntildeh_t = tanh(W_h x_t + U_h (f_t odot h_t-1) + b_h) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nmgu(inp, state)\nmgu(inp)\n\nArguments\n\ninp: The input to the mgu. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MGU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.NAS","page":"Layers","title":"RecurrentLayers.NAS","text":"NAS((input_size => hidden_size)::Pair; kwargs...)\n\nNeural Architecture Search unit. See NASCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntextFirst Layer Outputs  \no_1 = sigma(W_i^(1) x_t + W_h^(1) h_t-1 + b^(1)) \no_2 = textReLU(W_i^(2) x_t + W_h^(2) h_t-1 + b^(2)) \no_3 = sigma(W_i^(3) x_t + W_h^(3) h_t-1 + b^(3)) \no_4 = textReLU(W_i^(4) x_t cdot W_h^(4) h_t-1) \no_5 = tanh(W_i^(5) x_t + W_h^(5) h_t-1 + b^(5)) \no_6 = sigma(W_i^(6) x_t + W_h^(6) h_t-1 + b^(6)) \no_7 = tanh(W_i^(7) x_t + W_h^(7) h_t-1 + b^(7)) \no_8 = sigma(W_i^(8) x_t + W_h^(8) h_t-1 + b^(8)) \n\ntextSecond Layer Computations  \nl_1 = tanh(o_1 cdot o_2) \nl_2 = tanh(o_3 + o_4) \nl_3 = tanh(o_5 cdot o_6) \nl_4 = sigma(o_7 + o_8) \n\ntextInject Cell State  \nl_1 = tanh(l_1 + c_textstate) \n\ntextFinal Layer Computations  \nc_textnew = l_1 cdot l_2 \nl_5 = tanh(l_3 + l_4) \nh_textnew = tanh(c_textnew cdot l_5)\nendaligned\n\nForward\n\nnas(inp, (state, cstate))\nnas(inp)\n\nArguments\n\ninp: The input to the nas. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the NAS.    They should be vectors of size hidden_size or matrices of size hidden_size x batch_size.   If not provided, they are assumed to be vectors of zeros\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.RHN","page":"Layers","title":"RecurrentLayers.RHN","text":"RHN((input_size => hidden_size)::Pair depth=3; kwargs...)\n\nRecurrent highway network. See RHNCellUnit for a the unit component of this layer. See RHNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ndepth: depth of the recurrence. Default is 3\ncouple_carry: couples the carry gate and the transform gate. Default true\ninit_kernel: initializer for the input to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ns_ell^t = h_ell^t odot t_ell^t + s_ell-1^t odot c_ell^t \ntextwhere \nh_ell^t = tanh(W_h x^tmathbbI_ell = 1 + U_h_ell s_ell-1^t + b_h_ell) \nt_ell^t = sigma(W_t x^tmathbbI_ell = 1 + U_t_ell s_ell-1^t + b_t_ell) \nc_ell^t = sigma(W_c x^tmathbbI_ell = 1 + U_c_ell s_ell-1^t + b_c_ell)\nendaligned\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT1","page":"Layers","title":"RecurrentLayers.MUT1","text":"MUT1((input_size => hidden_size); kwargs...)\n\nMutated unit 1 network. See MUT1Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + tanh(W_h x_t) + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT2","page":"Layers","title":"RecurrentLayers.MUT2","text":"MUT2Cell((input_size => hidden_size); kwargs...)\n\nMutated unit 2 network. See MUT2Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z h_t + b_z) \nr = sigma(x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT3","page":"Layers","title":"RecurrentLayers.MUT3","text":"MUT3((input_size => hidden_size); kwargs...)\n\nMutated unit 3 network. See MUT3Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z tanh(h_t) + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.SCRN","page":"Layers","title":"RecurrentLayers.SCRN","text":"SCRN((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true,\n    alpha = 0.0)\n\nStructurally contraint recurrent unit. See SCRNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\nalpha: structural contraint. Default is 0.0\n\nEquations\n\nbeginaligned\ns_t = (1 - alpha) W_s x_t + alpha s_t-1 \nh_t = sigma(W_h s_t + U_h h_t-1 + b_h) \ny_t = f(U_y h_t + W_y s_t)\nendaligned\n\nForward\n\nscrn(inp, (state, cstate))\nscrn(inp)\n\nArguments\n\ninp: The input to the scrn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the SCRN.    They should be vectors of size hidden_size or matrices of size hidden_size x batch_size.   If not provided, they are assumed to be vectors of zeros\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.PeepholeLSTM","page":"Layers","title":"RecurrentLayers.PeepholeLSTM","text":"PeepholeLSTM((input_size => hidden_size)::Pair; kwargs...)\n\nPeephole long short term memory network. See PeepholeLSTMCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginalign\nf_t = sigma_g(W_f x_t + U_f c_t-1 + b_f) \ni_t = sigma_g(W_i x_t + U_i c_t-1 + b_i) \no_t = sigma_g(W_o x_t + U_o c_t-1 + b_o) \nc_t = f_t odot c_t-1 + i_t odot sigma_c(W_c x_t + b_c) \nh_t = o_t odot sigma_h(c_t)\nendalign\n\nForward\n\npeepholelstm(inp, (state, cstate))\npeepholelstm(inp)\n\nArguments\n\ninp: The input to the peepholelstm. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the PeepholeLSTM.    They should be vectors of size hidden_size or matrices of size hidden_size x batch_size.   If not provided, they are assumed to be vectors of zeros\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.FastRNN","page":"Layers","title":"RecurrentLayers.FastRNN","text":"FastRNN((input_size => hidden_size), [activation]; kwargs...)\n\nFast recurrent neural network. See FastRNNCell for a layer that processes a single sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = sigma(W_h x_t + U_h h_t-1 + b) \nh_t = alpha tildeh_t + beta h_t-1\nendaligned\n\nForward\n\nfastrnn(inp, state)\nfastrnn(inp)\n\nArguments\n\ninp: The input to the fastrnn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the FastRNN. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.FastGRNN","page":"Layers","title":"RecurrentLayers.FastGRNN","text":"FastGRNN((input_size => hidden_size), [activation]; kwargs...)\n\nFast recurrent neural network. See FastGRNNCell for a layer that processes a single sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1 + b_z) \ntildeh_t = tanh(W_h x_t + U_h h_t-1 + b_h) \nh_t = big((zeta (1 - z_t) + nu) odot tildeh_tbig) + z_t odot h_t-1\nendaligned\n\nForward\n\nfastgrnn(inp, state)\nfastgrnn(inp)\n\nArguments\n\ninp: The input to the fastgrnn. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastGRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"roadmap/#Roadmap","page":"Roadmap","title":"Roadmap","text":"","category":"section"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"This page documents some planned work for RecurrentLayers.jl. Future work for this library includes additional cells such as:","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"FastRNNs and FastGRUs (current focus) arxiv\nUnitary recurrent neural networks arxiv\nModern recurrent neural networks such as LRU  and minLSTM/minGRU\nQuasi recurrent neural networks arxiv","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"Additionally, some cell-independent architectures are also planned, that expand the ability of recurrent architectures and could theoretically take any cell:","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"Clockwork rnns arxiv\nPhased rnns arxiv\nSegment rnn arxiv\nFast-Slow rnns arxiv","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"An implementation of these ideally would be, for example FastSlow(RNNCell, input_size => hidden_size). More details on this soon!","category":"page"},{"location":"","page":"Home","title":"Home","text":"CurrentModule = RecurrentLayers","category":"page"},{"location":"#RecurrentLayers","page":"Home","title":"RecurrentLayers","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"RecurrentLayers.jl extends Flux.jl recurrent layers offering by providing implementations of bleeding edge recurrent layers not commonly available in base deep learning libraries. It is designed for a seamless integration with the larger Flux ecosystem, enabling researchers and practitioners to leverage the latest developments in recurrent neural networks.","category":"page"},{"location":"#Implemented-layers","page":"Home","title":"Implemented layers","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Minimal gated unit as MGUCell arxiv\nLight gated recurrent unit as LiGRUCell arxiv\nIndependently recurrent neural networks as IndRNNCell arxiv\nRecurrent addictive networks as RANCell arxiv\nRecurrent highway network as RHNCell arixv\nLight recurrent unit as LightRUCell pub\nNeural architecture search unit NASCell arxiv\nEvolving recurrent neural networks as MUT1Cell, MUT2Cell, MUT3Cell pub\nStructurally constrained recurrent neural network as SCRNCell arxiv\nPeephole long short term memory as PeepholeLSTMCell pub\nFastRNNCell and FastGRNNCell arxiv","category":"page"},{"location":"#Contributing","page":"Home","title":"Contributing","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Contributions are always welcome! We specifically look for :","category":"page"},{"location":"","page":"Home","title":"Home","text":"Recurrent cells you would like to see implemented \nBenchmarks\nAny bugs and mistakes of course!\nDocumentation, in any form: examples, how tos, docstrings  ","category":"page"}]
+[{"location":"api/cells/#Cells","page":"Cells","title":"Cells","text":"","category":"section"},{"location":"api/cells/","page":"Cells","title":"Cells","text":"RANCell\nIndRNNCell\nLightRUCell\nLiGRUCell\nMGUCell\nNASCell\nRHNCell\nRHNCellUnit\nMUT1Cell\nMUT2Cell\nMUT3Cell\nSCRNCell\nPeepholeLSTMCell\nFastRNNCell\nFastGRNNCell","category":"page"},{"location":"api/cells/#RecurrentLayers.RANCell","page":"Cells","title":"RecurrentLayers.RANCell","text":"RANCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nThe RANCell, introduced in this paper,  is a recurrent cell layer which provides additional memory through the use of gates.\n\nSee RAN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildec_t = W_c x_t \ni_t         = sigma(W_i x_t + U_i h_t-1 + b_i) \nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \nc_t         = i_t odot tildec_t + f_t odot c_t-1 \nh_t         = g(c_t)\nendaligned\n\nForward\n\nrancell(inp, (state, cstate))\nrancell(inp)\n\nArguments\n\ninp: The input to the rancell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the RANCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.IndRNNCell","page":"Cells","title":"RecurrentLayers.IndRNNCell","text":"IndRNNCell((input_size => hidden_size)::Pair, σ=relu;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nIndependently recurrent cell. See IndRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nσ: activation function. Default is tanh\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nmathbfh_t = sigma(mathbfW mathbfx_t + mathbfu odot mathbfh_t-1 + mathbfb)\n\nForward\n\nindrnncell(inp, state)\nindrnncell(inp)\n\nArguments\n\ninp: The input to the indrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the IndRNNCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.LightRUCell","page":"Cells","title":"RecurrentLayers.LightRUCell","text":"LightRUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nLight recurrent unit. See LightRU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = tanh(W_h x_t) \nf_t         = delta(W_f x_t + U_f h_t-1 + b_f) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nlightrucell(inp, state)\nlightrucell(inp)\n\nArguments\n\ninp: The input to the lightrucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the LightRUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.LiGRUCell","page":"Cells","title":"RecurrentLayers.LiGRUCell","text":"LiGRUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nLight gated recurrent unit. The implementation does not include the batch normalization as described in the original paper. See LiGRU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1) \ntildeh_t = textReLU(W_h x_t + U_h h_t-1) \nh_t = z_t odot h_t-1 + (1 - z_t) odot tildeh_t\nendaligned\n\nForward\n\nligrucell(inp, state)\nligrucell(inp)\n\nArguments\n\ninp: The input to the ligrucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the LiGRUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MGUCell","page":"Cells","title":"RecurrentLayers.MGUCell","text":"MGUCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMinimal gated unit. See MGU for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \ntildeh_t = tanh(W_h x_t + U_h (f_t odot h_t-1) + b_h) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nmgucell(inp, state)\nmgucell(inp)\n\nArguments\n\ninp: The input to the mgucell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MGUCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.NASCell","page":"Cells","title":"RecurrentLayers.NASCell","text":"NASCell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nNeural Architecture Search unit. See NAS for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntextFirst Layer Outputs  \no_1 = sigma(W_i^(1) x_t + W_h^(1) h_t-1 + b^(1)) \no_2 = textReLU(W_i^(2) x_t + W_h^(2) h_t-1 + b^(2)) \no_3 = sigma(W_i^(3) x_t + W_h^(3) h_t-1 + b^(3)) \no_4 = textReLU(W_i^(4) x_t cdot W_h^(4) h_t-1) \no_5 = tanh(W_i^(5) x_t + W_h^(5) h_t-1 + b^(5)) \no_6 = sigma(W_i^(6) x_t + W_h^(6) h_t-1 + b^(6)) \no_7 = tanh(W_i^(7) x_t + W_h^(7) h_t-1 + b^(7)) \no_8 = sigma(W_i^(8) x_t + W_h^(8) h_t-1 + b^(8)) \n\ntextSecond Layer Computations  \nl_1 = tanh(o_1 cdot o_2) \nl_2 = tanh(o_3 + o_4) \nl_3 = tanh(o_5 cdot o_6) \nl_4 = sigma(o_7 + o_8) \n\ntextInject Cell State  \nl_1 = tanh(l_1 + c_textstate) \n\ntextFinal Layer Computations  \nc_textnew = l_1 cdot l_2 \nl_5 = tanh(l_3 + l_4) \nh_textnew = tanh(c_textnew cdot l_5)\nendaligned\n\nForward\n\nnascell(inp, (state, cstate))\nnascell(inp)\n\nArguments\n\ninp: The input to the fastrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the NASCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.RHNCell","page":"Cells","title":"RecurrentLayers.RHNCell","text":"RHNCell((input_size => hidden_size), depth=3;\n    couple_carry::Bool = true,\n    cell_kwargs...)\n\nRecurrent highway network. See RHNCellUnit for a the unit component of this layer. See RHN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ndepth: depth of the recurrence. Default is 3\ncouple_carry: couples the carry gate and the transform gate. Default true\ninit_kernel: initializer for the input to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ns_ell^t = h_ell^t odot t_ell^t + s_ell-1^t odot c_ell^t \ntextwhere \nh_ell^t = tanh(W_h x^tmathbbI_ell = 1 + U_h_ell s_ell-1^t + b_h_ell) \nt_ell^t = sigma(W_t x^tmathbbI_ell = 1 + U_t_ell s_ell-1^t + b_t_ell) \nc_ell^t = sigma(W_c x^tmathbbI_ell = 1 + U_c_ell s_ell-1^t + b_c_ell)\nendaligned\n\nForward\n\nrnncell(inp, [state])\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.RHNCellUnit","page":"Cells","title":"RecurrentLayers.RHNCellUnit","text":"RHNCellUnit((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    bias = true)\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT1Cell","page":"Cells","title":"RecurrentLayers.MUT1Cell","text":"MUT1Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 1 cell. See MUT1 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + tanh(W_h x_t) + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT2Cell","page":"Cells","title":"RecurrentLayers.MUT2Cell","text":"MUT2Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 2 cell. See MUT2 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z h_t + b_z) \nr = sigma(x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.MUT3Cell","page":"Cells","title":"RecurrentLayers.MUT3Cell","text":"MUT3Cell((input_size => hidden_size);\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nMutated unit 3 cell. See MUT3 for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z tanh(h_t) + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmutcell(inp, state)\nmutcell(inp)\n\nArguments\n\ninp: The input to the mutcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the MUTCell. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state, \n\na tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.SCRNCell","page":"Cells","title":"RecurrentLayers.SCRNCell","text":"SCRNCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true,\n    alpha = 0.0)\n\nStructurally contraint recurrent unit. See SCRN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\nalpha: structural contraint. Default is 0.0\n\nEquations\n\nbeginaligned\ns_t = (1 - alpha) W_s x_t + alpha s_t-1 \nh_t = sigma(W_h s_t + U_h h_t-1 + b_h) \ny_t = f(U_y h_t + W_y s_t)\nendaligned\n\nForward\n\nscrncell(inp, (state, cstate))\nscrncell(inp)\n\nArguments\n\ninp: The input to the scrncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the SCRNCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.PeepholeLSTMCell","page":"Cells","title":"RecurrentLayers.PeepholeLSTMCell","text":"PeepholeLSTMCell((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nPeephole long short term memory cell. See PeepholeLSTM for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t = sigma_g(W_f x_t + U_f c_t-1 + b_f) \ni_t = sigma_g(W_i x_t + U_i c_t-1 + b_i) \no_t = sigma_g(W_o x_t + U_o c_t-1 + b_o) \nc_t = f_t odot c_t-1 + i_t odot sigma_c(W_c x_t + b_c) \nh_t = o_t odot sigma_h(c_t)\nendaligned\n\nForward\n\npeepholelstmcell(inp, (state, cstate))\npeepholelstmcell(inp)\n\nArguments\n\ninp: The input to the peepholelstmcell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the PeepholeLSTMCell. They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where output = new_state is the new hidden state and state = (new_state, new_cstate) is the new hidden and cell state.  They are tensors of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.FastRNNCell","page":"Cells","title":"RecurrentLayers.FastRNNCell","text":"FastRNNCell((input_size => hidden_size), [activation];\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nFast recurrent neural network cell. See FastRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = sigma(W_h x_t + U_h h_t-1 + b) \nh_t = alpha tildeh_t + beta h_t-1\nendaligned\n\nForward\n\nfastrnncell(inp, state)\nfastrnncell(inp)\n\nArguments\n\ninp: The input to the fastrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/cells/#RecurrentLayers.FastGRNNCell","page":"Cells","title":"RecurrentLayers.FastGRNNCell","text":"FastGRNNCell((input_size => hidden_size), [activation];\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true)\n\nFast gated recurrent neural network cell. See FastGRNN for a layer that processes entire sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1 + b_z) \ntildeh_t = tanh(W_h x_t + U_h h_t-1 + b_h) \nh_t = big((zeta (1 - z_t) + nu) odot tildeh_tbig) + z_t odot h_t-1\nendaligned\n\nForward\n\nfastgrnncell(inp, state)\nfastgrnncell(inp)\n\nArguments\n\ninp: The input to the fastgrnncell. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastGRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nA tuple (output, state), where both elements are given by the updated state new_state,  a tensor of size hidden_size or hidden_size x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#Cell-wrappers","page":"Layers","title":"Cell wrappers","text":"","category":"section"},{"location":"api/layers/","page":"Layers","title":"Layers","text":"RAN\nIndRNN\nLightRU\nLiGRU\nMGU\nNAS\nRHN\nMUT1\nMUT2\nMUT3\nSCRN\nPeepholeLSTM\nFastRNN\nFastGRNN","category":"page"},{"location":"api/layers/#RecurrentLayers.RAN","page":"Layers","title":"RecurrentLayers.RAN","text":"RAN(input_size => hidden_size; kwargs...)\n\nThe RANCell, introduced in this paper,  is a recurrent cell layer which provides additional memory through the use of gates.\n\nand returns both ht anf ct.\n\nSee RANCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildec_t = W_c x_t \ni_t         = sigma(W_i x_t + U_i h_t-1 + b_i) \nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \nc_t         = i_t odot tildec_t + f_t odot c_t-1 \nh_t         = g(c_t)\nendaligned\n\nForward\n\nran(inp, (state, cstate))\nran(inp)\n\nArguments\n\ninp: The input to the ran. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the RAN.  They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.IndRNN","page":"Layers","title":"RecurrentLayers.IndRNN","text":"IndRNN((input_size, hidden_size)::Pair, σ = tanh, σ=relu;\n    kwargs...)\n\nIndependently recurrent network. See IndRNNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nσ: activation function. Default is tanh\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nmathbfh_t = sigma(mathbfW mathbfx_t + mathbfu odot mathbfh_t-1 + mathbfb)\n\nForward\n\nindrnn(inp, state)\nindrnn(inp)\n\nArguments\n\ninp: The input to the indrnn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the IndRNN. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.LightRU","page":"Layers","title":"RecurrentLayers.LightRU","text":"LightRU((input_size => hidden_size)::Pair; kwargs...)\n\nLight recurrent unit network. See LightRUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = tanh(W_h x_t) \nf_t         = delta(W_f x_t + U_f h_t-1 + b_f) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nlightru(inp, state)\nlightru(inp)\n\nArguments\n\ninp: The input to the lightru. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the LightRU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.LiGRU","page":"Layers","title":"RecurrentLayers.LiGRU","text":"LiGRU((input_size => hidden_size)::Pair; kwargs...)\n\nLight gated recurrent network. The implementation does not include the batch normalization as described in the original paper. See LiGRUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1) \ntildeh_t = textReLU(W_h x_t + U_h h_t-1) \nh_t = z_t odot h_t-1 + (1 - z_t) odot tildeh_t\nendaligned\n\nForward\n\nligru(inp, state)\nligru(inp)\n\nArguments\n\ninp: The input to the ligru. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the LiGRU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MGU","page":"Layers","title":"RecurrentLayers.MGU","text":"MGU((input_size => hidden_size)::Pair; kwargs...)\n\nMinimal gated unit network. See MGUCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nf_t         = sigma(W_f x_t + U_f h_t-1 + b_f) \ntildeh_t = tanh(W_h x_t + U_h (f_t odot h_t-1) + b_h) \nh_t         = (1 - f_t) odot h_t-1 + f_t odot tildeh_t\nendaligned\n\nForward\n\nmgu(inp, state)\nmgu(inp)\n\nArguments\n\ninp: The input to the mgu. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MGU. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.NAS","page":"Layers","title":"RecurrentLayers.NAS","text":"NAS((input_size => hidden_size)::Pair; kwargs...)\n\nNeural Architecture Search unit. See NASCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntextFirst Layer Outputs  \no_1 = sigma(W_i^(1) x_t + W_h^(1) h_t-1 + b^(1)) \no_2 = textReLU(W_i^(2) x_t + W_h^(2) h_t-1 + b^(2)) \no_3 = sigma(W_i^(3) x_t + W_h^(3) h_t-1 + b^(3)) \no_4 = textReLU(W_i^(4) x_t cdot W_h^(4) h_t-1) \no_5 = tanh(W_i^(5) x_t + W_h^(5) h_t-1 + b^(5)) \no_6 = sigma(W_i^(6) x_t + W_h^(6) h_t-1 + b^(6)) \no_7 = tanh(W_i^(7) x_t + W_h^(7) h_t-1 + b^(7)) \no_8 = sigma(W_i^(8) x_t + W_h^(8) h_t-1 + b^(8)) \n\ntextSecond Layer Computations  \nl_1 = tanh(o_1 cdot o_2) \nl_2 = tanh(o_3 + o_4) \nl_3 = tanh(o_5 cdot o_6) \nl_4 = sigma(o_7 + o_8) \n\ntextInject Cell State  \nl_1 = tanh(l_1 + c_textstate) \n\ntextFinal Layer Computations  \nc_textnew = l_1 cdot l_2 \nl_5 = tanh(l_3 + l_4) \nh_textnew = tanh(c_textnew cdot l_5)\nendaligned\n\nForward\n\nnas(inp, (state, cstate))\nnas(inp)\n\nArguments\n\ninp: The input to the nas. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the NAS.  They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.RHN","page":"Layers","title":"RecurrentLayers.RHN","text":"RHN((input_size => hidden_size)::Pair depth=3; kwargs...)\n\nRecurrent highway network. See RHNCellUnit for a the unit component of this layer. See RHNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ndepth: depth of the recurrence. Default is 3\ncouple_carry: couples the carry gate and the transform gate. Default true\ninit_kernel: initializer for the input to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ns_ell^t = h_ell^t odot t_ell^t + s_ell-1^t odot c_ell^t \ntextwhere \nh_ell^t = tanh(W_h x^tmathbbI_ell = 1 + U_h_ell s_ell-1^t + b_h_ell) \nt_ell^t = sigma(W_t x^tmathbbI_ell = 1 + U_t_ell s_ell-1^t + b_t_ell) \nc_ell^t = sigma(W_c x^tmathbbI_ell = 1 + U_c_ell s_ell-1^t + b_c_ell)\nendaligned\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT1","page":"Layers","title":"RecurrentLayers.MUT1","text":"MUT1((input_size => hidden_size); kwargs...)\n\nMutated unit 1 network. See MUT1Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + tanh(W_h x_t) + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT2","page":"Layers","title":"RecurrentLayers.MUT2","text":"MUT2Cell((input_size => hidden_size); kwargs...)\n\nMutated unit 2 network. See MUT2Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z h_t + b_z) \nr = sigma(x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.MUT3","page":"Layers","title":"RecurrentLayers.MUT3","text":"MUT3((input_size => hidden_size); kwargs...)\n\nMutated unit 3 network. See MUT3Cell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz = sigma(W_z x_t + U_z tanh(h_t) + b_z) \nr = sigma(W_r x_t + U_r h_t + b_r) \nh_t+1 = tanh(U_h (r odot h_t) + W_h x_t + b_h) odot z \nquad + h_t odot (1 - z)\nendaligned\n\nForward\n\nmut(inp, state)\nmut(inp)\n\nArguments\n\ninp: The input to the mut. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the MUT. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.SCRN","page":"Layers","title":"RecurrentLayers.SCRN","text":"SCRN((input_size => hidden_size)::Pair;\n    init_kernel = glorot_uniform,\n    init_recurrent_kernel = glorot_uniform,\n    bias = true,\n    alpha = 0.0)\n\nStructurally contraint recurrent unit. See SCRNCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\nalpha: structural contraint. Default is 0.0\n\nEquations\n\nbeginaligned\ns_t = (1 - alpha) W_s x_t + alpha s_t-1 \nh_t = sigma(W_h s_t + U_h h_t-1 + b_h) \ny_t = f(U_y h_t + W_y s_t)\nendaligned\n\nForward\n\nscrn(inp, (state, cstate))\nscrn(inp)\n\nArguments\n\ninp: The input to the scrn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the SCRN.  They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.PeepholeLSTM","page":"Layers","title":"RecurrentLayers.PeepholeLSTM","text":"PeepholeLSTM((input_size => hidden_size)::Pair; kwargs...)\n\nPeephole long short term memory network. See PeepholeLSTMCell for a layer that processes a single sequence.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginalign\nf_t = sigma_g(W_f x_t + U_f c_t-1 + b_f) \ni_t = sigma_g(W_i x_t + U_i c_t-1 + b_i) \no_t = sigma_g(W_o x_t + U_o c_t-1 + b_o) \nc_t = f_t odot c_t-1 + i_t odot sigma_c(W_c x_t + b_c) \nh_t = o_t odot sigma_h(c_t)\nendalign\n\nForward\n\npeepholelstm(inp, (state, cstate))\npeepholelstm(inp)\n\nArguments\n\ninp: The input to the peepholelstm. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\n(state, cstate): A tuple containing the hidden and cell states of the PeepholeLSTM.  They should be vectors of size hidden_size or matrices of size hidden_size x batch_size. If not provided, they are assumed to be vectors of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.FastRNN","page":"Layers","title":"RecurrentLayers.FastRNN","text":"FastRNN((input_size => hidden_size), [activation]; kwargs...)\n\nFast recurrent neural network. See FastRNNCell for a layer that processes a single sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\ntildeh_t = sigma(W_h x_t + U_h h_t-1 + b) \nh_t = alpha tildeh_t + beta h_t-1\nendaligned\n\nForward\n\nfastrnn(inp, state)\nfastrnn(inp)\n\nArguments\n\ninp: The input to the fastrnn. It should be a vector of size input_size x len or a matrix of size input_size x len x batch_size.\nstate: The hidden state of the FastRNN. If given, it is a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"api/layers/#RecurrentLayers.FastGRNN","page":"Layers","title":"RecurrentLayers.FastGRNN","text":"FastGRNN((input_size => hidden_size), [activation]; kwargs...)\n\nFast recurrent neural network. See FastGRNNCell for a layer that processes a single sequences.\n\nArguments\n\ninput_size => hidden_size: input and inner dimension of the layer\nactivation: the activation function, defaults to tanh_fast\ninit_kernel: initializer for the input to hidden weights\ninit_recurrent_kernel: initializer for the hidden to hidden weights\nbias: include a bias or not. Default is true\n\nEquations\n\nbeginaligned\nz_t = sigma(W_z x_t + U_z h_t-1 + b_z) \ntildeh_t = tanh(W_h x_t + U_h h_t-1 + b_h) \nh_t = big((zeta (1 - z_t) + nu) odot tildeh_tbig) + z_t odot h_t-1\nendaligned\n\nForward\n\nfastgrnn(inp, state)\nfastgrnn(inp)\n\nArguments\n\ninp: The input to the fastgrnn. It should be a vector of size input_size or a matrix of size input_size x batch_size.\nstate: The hidden state of the FastGRNN. It should be a vector of size hidden_size or a matrix of size hidden_size x batch_size. If not provided, it is assumed to be a vector of zeros, initialized by Flux.initialstates.\n\nReturns\n\nNew hidden states new_states as an array of size hidden_size x len x batch_size.\n\n\n\n\n\n","category":"type"},{"location":"roadmap/#Roadmap","page":"Roadmap","title":"Roadmap","text":"","category":"section"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"This page documents some planned work for RecurrentLayers.jl. Future work for this library includes additional cells such as:","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"FastRNNs and FastGRUs (current focus) arxiv\nUnitary recurrent neural networks arxiv\nModern recurrent neural networks such as LRU  and minLSTM/minGRU\nQuasi recurrent neural networks arxiv","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"Additionally, some cell-independent architectures are also planned, that expand the ability of recurrent architectures and could theoretically take any cell:","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"Clockwork rnns arxiv\nPhased rnns arxiv\nSegment rnn arxiv\nFast-Slow rnns arxiv","category":"page"},{"location":"roadmap/","page":"Roadmap","title":"Roadmap","text":"An implementation of these ideally would be, for example FastSlow(RNNCell, input_size => hidden_size). More details on this soon!","category":"page"},{"location":"","page":"Home","title":"Home","text":"CurrentModule = RecurrentLayers","category":"page"},{"location":"#RecurrentLayers","page":"Home","title":"RecurrentLayers","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"RecurrentLayers.jl extends Flux.jl recurrent layers offering by providing implementations of bleeding edge recurrent layers not commonly available in base deep learning libraries. It is designed for a seamless integration with the larger Flux ecosystem, enabling researchers and practitioners to leverage the latest developments in recurrent neural networks.","category":"page"},{"location":"#Implemented-layers","page":"Home","title":"Implemented layers","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Minimal gated unit as MGUCell arxiv\nLight gated recurrent unit as LiGRUCell arxiv\nIndependently recurrent neural networks as IndRNNCell arxiv\nRecurrent addictive networks as RANCell arxiv\nRecurrent highway network as RHNCell arixv\nLight recurrent unit as LightRUCell pub\nNeural architecture search unit NASCell arxiv\nEvolving recurrent neural networks as MUT1Cell, MUT2Cell, MUT3Cell pub\nStructurally constrained recurrent neural network as SCRNCell arxiv\nPeephole long short term memory as PeepholeLSTMCell pub\nFastRNNCell and FastGRNNCell arxiv","category":"page"},{"location":"#Contributing","page":"Home","title":"Contributing","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Contributions are always welcome! We specifically look for :","category":"page"},{"location":"","page":"Home","title":"Home","text":"Recurrent cells you would like to see implemented \nBenchmarks\nAny bugs and mistakes of course!\nDocumentation, in any form: examples, how tos, docstrings  ","category":"page"}]
 }