From 48743832a642d0c4bf90d4e4bd1f30b230ae178e Mon Sep 17 00:00:00 2001 From: MarcSzafraniec Date: Wed, 18 Sep 2024 16:46:38 +0200 Subject: [PATCH] [fix] Correctly pass mask in TransformerBlock.forward in transformer_layers.py The attention mask was not passed correctly to the `Attention` in `TransformerBlock.forward`. One problem it caused was that when passing two images in the image encoder, the attention would be done on all images at the same time, thus taking more resources and returning an incorrect result. This PR fixes this problem. --- src/mistral_inference/transformer_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mistral_inference/transformer_layers.py b/src/mistral_inference/transformer_layers.py index 4ee23f5..1069ec3 100644 --- a/src/mistral_inference/transformer_layers.py +++ b/src/mistral_inference/transformer_layers.py @@ -162,7 +162,7 @@ def forward( cache: Optional[CacheView] = None, mask: Optional[BlockDiagonalMask] = None, ) -> torch.Tensor: - r = self.attention.forward(self.attention_norm(x), freqs_cis, cache) + r = self.attention.forward(x=self.attention_norm(x), freqs_cis=freqs_cis, cache=cache, mask=mask) h = x + r r = self.feed_forward.forward(self.ffn_norm(h)) out = h + r