From 48743832a642d0c4bf90d4e4bd1f30b230ae178e Mon Sep 17 00:00:00 2001
From: MarcSzafraniec <mszafraniec@fb.com>
Date: Wed, 18 Sep 2024 16:46:38 +0200
Subject: [PATCH] [fix] Correctly pass mask in TransformerBlock.forward in
 transformer_layers.py

The attention mask was not passed correctly to the `Attention` in `TransformerBlock.forward`.

One problem it caused was that when passing two images in the image encoder, the attention would be done on all images at the same time, thus taking more resources and returning an incorrect result.

This PR fixes this problem.
---
 src/mistral_inference/transformer_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mistral_inference/transformer_layers.py b/src/mistral_inference/transformer_layers.py
index 4ee23f5..1069ec3 100644
--- a/src/mistral_inference/transformer_layers.py
+++ b/src/mistral_inference/transformer_layers.py
@@ -162,7 +162,7 @@ def forward(
         cache: Optional[CacheView] = None,
         mask: Optional[BlockDiagonalMask] = None,
     ) -> torch.Tensor:
-        r = self.attention.forward(self.attention_norm(x), freqs_cis, cache)
+        r = self.attention.forward(x=self.attention_norm(x), freqs_cis=freqs_cis, cache=cache, mask=mask)
         h = x + r
         r = self.feed_forward.forward(self.ffn_norm(h))
         out = h + r