In [None]:
import torch
dtype = torch.FloatTensor

## Create an artificial dataset.

In [None]:
# Create random input data X: 100 examples, 10 features.
X = 10 * torch.rand(10, 100).type(dtype) + 7

# Create random true model, with parameters w_true and b_true.
w_true = torch.randn(1, 10).type(dtype)
b_true = 3

# Compute true labels using true model.
y = w_true.mm(X) + torch.ones(1, 100) * b_true
# Add some small random noise to labels.
y = y + 0.01 * torch.rand(100)
print(X[:,3])
print(y[:,3])

## Linear regression with gradient descent.

In [None]:
# Create Tensors for weights, initialize with random values.
# Setting requires_grad=True indicates that we want to compute gradients with respect to them during the backward pass.
w = torch.tensor(torch.randn(1, 10).type(dtype), requires_grad=True)
b = torch.tensor(torch.zeros(1, 1).type(dtype), requires_grad=True)
ones = torch.tensor(torch.ones(1, 100).type(dtype))

In [None]:
learning_rate1 = 0.0001 # try 0.1
learning_rate2 = 0.1
for epoch in range(30000): # try 100 first
    # Forward pass.
    y_pred = w.mm(X) + b

    # Compute loss.
    loss = (y_pred - y).pow(2).sum() / 100
    if epoch % 1000 == 0:
        print('Epoch', epoch, 'loss = ', loss.data.item())

    # Backward pass.
    loss.backward()

    # Update weights using gradient descent; w1.data and w2.data are Tensors,
    # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are
    # Tensors.
    w.data -= learning_rate1 * w.grad.data
    b.data -= learning_rate2 * b.grad.data

    # Manually zero the gradients after updating weights
    w.grad.data.zero_()
    b.grad.data.zero_()

In [None]:
print(w_true)
print(b_true)

In [None]:
print(w.data)
print(b.data)

## Linear regression with Adam (GD with adaptive learning rate).

In [None]:
# Create Tensors for weights, initialize with random values.
# Setting requires_grad=True indicates that we want to compute gradients with respect to them during the backward pass.
w = torch.tensor(torch.randn(1, 10).type(dtype), requires_grad=True)
b = torch.tensor(torch.zeros(1, 1).type(dtype), requires_grad=True)
ones = torch.tensor(torch.ones(1, 100).type(dtype))

In [None]:
optimizer = torch.optim.Adam([b, w], lr = 0.001)

for epoch in range(30000):  # try 100 first
    # Forward pass.
    y_pred = w.mm(X) + b

    # Compute loss.
    loss = (y_pred - y).pow(2).sum() / 100
    if epoch % 1000 == 0:
        print('Epoch', epoch, 'loss = ', loss.data.item())

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

In [None]:
print(w_true)
print(b_true)

In [None]:
print(w.data)
print(b.data)

## Linear regression with [torch.nn](https://pytorch.org/docs/stable/nn.html) functions.

In [None]:
# Put examples in rows.
X = X.transpose(0, 1)
y = y.transpose(0, 1)

In [None]:
# Create Linear model, see https://pytorch.org/docs/stable/nn.html#linear.
model = torch.nn.Linear(X.shape[1], 1)

loss_fn = torch.nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(30001):  # try 100 first
    # Forward pass.
    y_pred = model(X)

    # Compute loss.
    loss = loss_fn(y_pred, y)
    if epoch % 1000 == 0:
        print(epoch, loss.data)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

In [None]:
print(w_true)
print(b_true)

In [None]:
print(w.data)
print(b.data)

# Experiment with linear regression and multi-layer NNs for the houses dataset.