Skip to content
This repository has been archived by the owner on Dec 20, 2024. It is now read-only.

Commit

Permalink
chore: remove explicit float typing
Browse files Browse the repository at this point in the history
  • Loading branch information
theissenhelen committed Dec 19, 2024
1 parent 2d122df commit d4510f6
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/anemoi/models/layers/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def sliding_window_mask(b, h, q_idx, kv_idx):
self.attention = torch.compile(self.attention)
self.is_attn_compiled = True

# TODO test how this impacts scaling at large model counts
# TODO(Cathal): test how this impacts scaling at large model counts
torch._dynamo.config.optimize_ddp = False
out = self.attention(query, key, value)
torch._dynamo.config.optimize_ddp = True
Expand Down Expand Up @@ -354,10 +354,10 @@ def get_alibi_slopes(num_heads: int) -> Tensor:
aLiBi slopes
"""
n = 2 ** math.floor(math.log2(num_heads))
slope_0 = 2.0 ** (-8.0 / n)
slope_0 = 2 ** (-8 / n)
alibi_slopes = torch.pow(slope_0, torch.arange(1, 1 + n))
if n < num_heads:
slope_hat_0 = 2.0 ** (-4.0 / n)
slope_hat_0 = 2 ** (-4 / n)
alibi_slopes_hat = torch.pow(slope_hat_0, torch.arange(1, 1 + 2 * (num_heads - n), 2))
alibi_slopes = torch.cat([alibi_slopes, alibi_slopes_hat])
return alibi_slopes

0 comments on commit d4510f6

Please sign in to comment.