import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
import scikitplot as skplt
from sklearn.metrics import mean_squared_error
n = 100
maxdegree = 6
# Make data set.
x = np.linspace(-3, 3, n).reshape(-1, 1)
y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape)
error = np.zeros(maxdegree)
bias = np.zeros(maxdegree)
variance = np.zeros(maxdegree)
polydegree = np.zeros(maxdegree)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
for degree in range(1,maxdegree):
model = GradientBoostingRegressor(max_depth=degree, n_estimators=100, learning_rate=1.0)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
polydegree[degree] = degree
error[degree] = np.mean( np.mean((y_test - y_pred)**2) )
bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 )
variance[degree] = np.mean( np.var(y_pred) )
print('Max depth:', degree)
print('Error:', error[degree])
print('Bias^2:', bias[degree])
print('Var:', variance[degree])
print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree]))
plt.xlim(1,maxdegree-1)
plt.plot(polydegree, error, label='Error')
plt.plot(polydegree, bias, label='bias')
plt.plot(polydegree, variance, label='Variance')
plt.legend()
save_fig("gdregression")
plt.show()