-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtdstorch.py
71 lines (58 loc) · 2.1 KB
/
tdstorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#https://towardsdatascience.com/understanding-pytorch-with-an-example-a-step-by-step-tutorial-81fc5f8c4e8e
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
from torchviz import make_dot
device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed(42)
x = np.random.rand(100, 1)
print(x)
y = 1 + 2 * x + .1 * np.random.randn(100, 1)
# Shuffles the indices
idx = np.arange(100)
np.random.shuffle(idx)
# Uses first 80 random indices for train
train_idx = idx[:80]
# Uses the remaining indices for validation
val_idx = idx[80:]
# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
print(x_train)
x_val, y_val = x[val_idx], y[val_idx]
# Our data was in Numpy arrays, but we need to transform them into PyTorch's Tensors
# and then we send them to the chosen device
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).float().to(device)
# Here we can see the difference - notice that .type() is more useful
# since it also tells us WHERE the tensor is (device)
print(type(x_train), type(x_train_tensor), x_train_tensor.type())
#print(x_train, x_train_tensor)
# Initializes parameters "a" and "b" randomly
np.random.seed(42)
a = np.random.randn(1)
b = np.random.randn(1)
print(a, b)
# Sets learning rate
lr = 1e-1
# Defines number of epochs
n_epochs = 1000
for epoch in range(n_epochs):
# Computes our model's predicted output
yhat = a + b * x_train
# How wrong is our model? That's the error!
error = (y_train - yhat)
# It is a regression, so it computes mean squared error (MSE)
loss = (error ** 2).mean()
# Computes gradients for both "a" and "b" parameters
a_grad = -2 * error.mean()
b_grad = -2 * (x_train * error).mean()
# Updates parameters using gradients and the learning rate
a = a - lr * a_grad
b = b - lr * b_grad
print(a, b)
# Sanity Check: do we get the same results as our gradient descent?
from sklearn.linear_model import LinearRegression
linr = LinearRegression()
linr.fit(x_train, y_train)
print(linr.intercept_, linr.coef_[0])