In this code we include the stochastic gradient descent approach discussed above. Note here that we specify which argument we are taking the derivative with respect to when using autograd.
# Using Autograd to calculate gradients using SGD
# OLS example
from random import random, seed
import numpy as np
import autograd.numpy as np
import matplotlib.pyplot as plt
from autograd import grad
# Note change from previous example
def CostOLS(y,X,theta):
return np.sum((y-X @ theta)**2)
n = 100
x = 2*np.random.rand(n,1)
y = 4+3*x+np.random.randn(n,1)
X = np.c_[np.ones((n,1)), x]
XT_X = X.T @ X
theta_linreg = np.linalg.pinv(XT_X) @ (X.T @ y)
print("Own inversion")
print(theta_linreg)
# Hessian matrix
H = (2.0/n)* XT_X
EigValues, EigVectors = np.linalg.eig(H)
print(f"Eigenvalues of Hessian Matrix:{EigValues}")
theta = np.random.randn(2,1)
eta = 1.0/np.max(EigValues)
Niterations = 1000
# Note that we request the derivative wrt third argument (theta, 2 here)
training_gradient = grad(CostOLS,2)
for iter in range(Niterations):
gradients = (1.0/n)*training_gradient(y, X, theta)
theta -= eta*gradients
print("theta from own gd")
print(theta)
xnew = np.array([[0],[2]])
Xnew = np.c_[np.ones((2,1)), xnew]
ypredict = Xnew.dot(theta)
ypredict2 = Xnew.dot(theta_linreg)
plt.plot(xnew, ypredict, "r-")
plt.plot(xnew, ypredict2, "b-")
plt.plot(x, y ,'ro')
plt.axis([0,2.0,0, 15.0])
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.title(r'Random numbers ')
plt.show()
n_epochs = 50
M = 5 #size of each minibatch
m = int(n/M) #number of minibatches
t0, t1 = 5, 50
def learning_schedule(t):
return t0/(t+t1)
theta = np.random.randn(2,1)
for epoch in range(n_epochs):
# Can you figure out a better way of setting up the contributions to each batch?
for i in range(m):
random_index = M*np.random.randint(m)
xi = X[random_index:random_index+M]
yi = y[random_index:random_index+M]
gradients = (1.0/M)*training_gradient(yi, xi, theta)
eta = learning_schedule(epoch*m+i)
theta = theta - eta*gradients
print("theta from own sdg")
print(theta)