First a simple gradient descent solution

from random import random, seed
import numpy as np
# The function we want to optimize
def f(x):
    return x[0]**2 + 10.0*x[1]**2+x[0]*x[1]-5.0*x[0]-3*x[1]

def df(x):
    return np.array([2*x[0]+x[1]-5.0, x[0]+20*x[1]-3.0])

# Matrix A
A = np.array([[2,1],[1,20]])
# Get the eigenvalues
EigValues, EigVectors = np.linalg.eig(A)
print(f"Eigenvalues of Matrix A:{EigValues}")
# Vector b
b = np.array([[5,3]])
# Exact solution
exact = np.linalg.inv(A) @ b.T
print("Exact solution:",exact)

# start simple gradient descent with random guess
x = np.random.randn(2,1)

# finding optimal learning rate
eta = 1.0/np.max(EigValues)
Niterations = 100

# a brute force implementation of gradient descent
for iter in range(Niterations):
    gradient = df(x)
#    print(gradient)
    x -= eta*gradient
    
print("Absolute error for Gradient descent solution",np.abs(x-exact))