forked from cybertronai/autograd-lib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_norms.py
53 lines (41 loc) · 1.36 KB
/
example_norms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
from torch import nn
from autograd_lib import autograd_lib
from collections import defaultdict
from attrdict import AttrDefault
def simple_model(d, num_layers):
"""Creates simple linear neural network initialized to identity"""
layers = []
for i in range(num_layers):
layer = nn.Linear(d, d, bias=False)
layer.weight.data.copy_(torch.eye(d))
layers.append(layer)
return torch.nn.Sequential(*layers)
def least_squares(data, targets=None):
"""Least squares loss (like MSELoss, but an extra 1/2 factor."""
if targets is None:
targets = torch.zeros_like(data)
err = data - targets.view(-1, data.shape[1])
return torch.sum(err * err) / 2 / len(data)
depth = 5
width = 2
n = 3
model = simple_model(width, depth)
data = torch.ones((n, width))
targets = torch.ones((n, width))
loss_fn = least_squares
from autograd_lib import autograd_lib
autograd_lib.register(model)
activations = {}
norms = [torch.zeros(n)]
def save_activations(layer, A, _):
activations[layer] = A
with autograd_lib.module_hook(save_activations):
output = model(data)
loss = loss_fn(output)
def per_example_norms(layer, _, B):
A = activations[layer]
norms[0]+=(A*A).sum(dim=1)*(B*B).sum(dim=1)
with autograd_lib.module_hook(per_example_norms):
loss.backward()
print('per-example gradient norms squared:', norms[0])