last commit for the day

lucidrains · Jan 22, 2025 · 560f6bc · 560f6bc
1 parent 872e7ba
commit 560f6bc
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 3 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "titans-pytorch"
-version = "0.1.22"
+version = "0.1.23"
 description = "Titans"
 authors = [
     { name = "Phil Wang", email = "[email protected]" }

diff --git a/tests/test_titans.py b/tests/test_titans.py
@@ -193,3 +193,23 @@ def test_flex(
     out_non_flex, _ = attn(seq, disable_flex_attn = True)
 
     assert torch.allclose(out_flex, out_non_flex, atol = 1e-5)
+
+def test_assoc_scan():
+    from titans_pytorch.titans import AssocScan
+    import torch.nn.functional as F
+
+    scan = AssocScan()
+
+    gates = torch.randn(2, 1024, 512).sigmoid()
+    inputs = torch.randn(2, 1024, 512)
+
+    output = scan(gates, inputs)
+
+    gates1, gates2 = gates[:, :512], gates[:, 512:]
+    inputs1, inputs2 = inputs[:, :512], inputs[:, 512:]
+
+    first_half = scan(gates1, inputs1)
+
+    second_half = scan(gates2, inputs2, prev = inputs2[:, -1])
+
+    assert torch.allclose(output[:, -1], second_half[:, -1], atol = 1e-5)
diff --git a/titans_pytorch/titans.py b/titans_pytorch/titans.py
@@ -324,7 +324,11 @@ def __init__(
         super().__init__()
         self.use_accelerated = use_accelerated
 
-    def forward(self, gates, inputs):
+    def forward(self, gates, inputs, prev = None):
+
+        if exists(prev):
+            inputs, _ = pack([prev, inputs], 'b * d')
+            gates = pad_at_dim(gates, (1, 0), value = 1., dim = -2)
 
         if not self.use_accelerated:
             _, outputs = associative_scan(binary_operator, (gates, inputs))
@@ -789,7 +793,9 @@ def forward_inference(
 
         # store if storage sequence cache hits the chunk size
 
-        if cache_store_seq.shape[-2] == self.chunk_size:
+        store_seq_cache_len = cache_store_seq.shape[-2]
+
+        if store_seq_cache_len == self.chunk_size:
             updates, _ = self.store_memories(cache_store_seq, mem_model_state)
 
             past_weights, past_momentum = mem_model_state