Bayesian inference and conformal prediction (prediction intervals) in nnetsauce v0.18.1
This article was first published on T. Moudiki's Webpage - Python , and kindly contributed to python-bloggers. (You can report issue about the content on this page here)
Want to share your content on python-bloggers? click here.
Want to share your content on python-bloggers? click here.
Version
0.18.1
0.18.1
of nnetsauce
nnetsauce
(Python version) is available on PyPI and for conda
conda
. New developments include Bayesian inference and conformal prediction. Bayesian inference is available for scikit-learn
scikit-learn
models that possess a posterior distribution (BayesianRidge
BayesianRidge
, ARDRegressor
ARDRegressor
, and GaussianProcessRegressor
GaussianProcessRegressor
). Conformal prediction is available for every regression model that follows the “fit_predict
fit_predict
” API. Conformal prediction for classification will be available in future versions.Note: In examples, QRNN = Quasi-Randomized Nnetworks
1 – Installation
!pip uninstall nnetsauce --yes
!pip uninstall nnetsauce --yes
!pip uninstall nnetsauce --yes
!pip install nnetsauce --upgrade --no-cache-dir
!pip install nnetsauce --upgrade --no-cache-dir
!pip install nnetsauce --upgrade --no-cache-dir
!pip install matplotlib==3.1.3
!pip install matplotlib==3.1.3
!pip install matplotlib==3.1.3
import os
import nnetsauce as ns
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV
from sklearn.ensemble import ExtraTreesRegressor
from time import time
import os
import nnetsauce as ns
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV
from sklearn.ensemble import ExtraTreesRegressor
from time import time
import os import nnetsauce as ns import matplotlib.pyplot as plt import numpy as np import warnings from sklearn.datasets import fetch_california_housing, load_diabetes from sklearn.model_selection import train_test_split from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV from sklearn.ensemble import ExtraTreesRegressor from time import time
2 – Useful plotting functions
warnings.filterwarnings('ignore')
split_color = 'green'
split_color2 = 'orange'
local_color = 'gray'
def plot_func(x,
y,
y_u=None,
y_l=None,
pred=None,
shade_color="",
method_name="",
title=""):
fig = plt.figure()
plt.plot(x, y, 'k.', alpha=.3, markersize=10,
fillstyle='full', label=u'Test set observations')
if (y_u is not None) and (y_l is not None):
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_u, y_l[::-1]]),
alpha=.3, fc=shade_color, ec='None',
label = method_name + ' Prediction interval')
if pred is not None:
plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
label=u'Predicted value')
#plt.ylim([-2.5, 7])
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.legend(loc='upper right')
plt.title(title)
plt.show()
warnings.filterwarnings('ignore')
split_color = 'green'
split_color2 = 'orange'
local_color = 'gray'
def plot_func(x,
y,
y_u=None,
y_l=None,
pred=None,
shade_color="",
method_name="",
title=""):
fig = plt.figure()
plt.plot(x, y, 'k.', alpha=.3, markersize=10,
fillstyle='full', label=u'Test set observations')
if (y_u is not None) and (y_l is not None):
plt.fill(np.concatenate([x, x[::-1]]),
np.concatenate([y_u, y_l[::-1]]),
alpha=.3, fc=shade_color, ec='None',
label = method_name + ' Prediction interval')
if pred is not None:
plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
label=u'Predicted value')
#plt.ylim([-2.5, 7])
plt.xlabel('$X$')
plt.ylabel('$Y$')
plt.legend(loc='upper right')
plt.title(title)
plt.show()
warnings.filterwarnings('ignore') split_color = 'green' split_color2 = 'orange' local_color = 'gray' def plot_func(x, y, y_u=None, y_l=None, pred=None, shade_color="", method_name="", title=""): fig = plt.figure() plt.plot(x, y, 'k.', alpha=.3, markersize=10, fillstyle='full', label=u'Test set observations') if (y_u is not None) and (y_l is not None): plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_u, y_l[::-1]]), alpha=.3, fc=shade_color, ec='None', label = method_name + ' Prediction interval') if pred is not None: plt.plot(x, pred, 'k--', lw=2, alpha=0.9, label=u'Predicted value') #plt.ylim([-2.5, 7]) plt.xlabel('$X$') plt.ylabel('$Y$') plt.legend(loc='upper right') plt.title(title) plt.show()
3 – Examples of use
3 – 1 Conformalized Quasi-Randomized Nnetworks
data = fetch_california_housing()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
data = fetch_california_housing()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
data = fetch_california_housing() X = data.data y= data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
3 – 1 conformalized QRNN
RidgeCV
regr1 = ns.CustomRegressor(RidgeCV()) # 5 hidden nodes, ReLU activation function
regr1.fit(X_train, y_train)
start = time()
preds1 = regr1.predict(X_test, method="splitconformal", return_pi=True, level=95)
print(f"Elapsed: {time() - start}s")
print(f"coverage_rate conformalized QRNN RidgeCV: {np.mean((preds1[1]<=y_test)*(preds1[2]>=y_test))}")
max_idx = 50
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds1[2][0:max_idx],
y_l = preds1[1][0:max_idx],
pred = preds1[0][0:max_idx],
shade_color=split_color2,
title = f"conformalized QRNN RidgeCV ({max_idx} first points in test set)")
regr1 = ns.CustomRegressor(RidgeCV()) # 5 hidden nodes, ReLU activation function
regr1.fit(X_train, y_train)
start = time()
preds1 = regr1.predict(X_test, method="splitconformal", return_pi=True, level=95)
print(f"Elapsed: {time() - start}s")
print(f"coverage_rate conformalized QRNN RidgeCV: {np.mean((preds1[1]<=y_test)*(preds1[2]>=y_test))}")
max_idx = 50
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds1[2][0:max_idx],
y_l = preds1[1][0:max_idx],
pred = preds1[0][0:max_idx],
shade_color=split_color2,
title = f"conformalized QRNN RidgeCV ({max_idx} first points in test set)")
regr1 = ns.CustomRegressor(RidgeCV()) # 5 hidden nodes, ReLU activation function regr1.fit(X_train, y_train) start = time() preds1 = regr1.predict(X_test, method="splitconformal", return_pi=True, level=95) print(f"Elapsed: {time() - start}s") print(f"coverage_rate conformalized QRNN RidgeCV: {np.mean((preds1[1]<=y_test)*(preds1[2]>=y_test))}") max_idx = 50 plot_func(x = range(max_idx), y = y_test[0:max_idx], y_u = preds1[2][0:max_idx], y_l = preds1[1][0:max_idx], pred = preds1[0][0:max_idx], shade_color=split_color2, title = f"conformalized QRNN RidgeCV ({max_idx} first points in test set)")
coverage_rate conformalized QRNN RidgeCV: 0.9578488372093024
coverage_rate conformalized QRNN RidgeCV: 0.9578488372093024
coverage_rate conformalized QRNN RidgeCV: 0.9578488372093024
Extra Trees
regr4 = ns.CustomRegressor(ExtraTreesRegressor()) # 5 hidden nodes, ReLU activation function
regr4.fit(X_train, y_train)
start = time()
preds4 = regr4.predict(X_test, method="splitconformal", return_pi=True, level=90)
print(f"Elapsed: {time() - start}s")
print(f"preds4: {preds4}")
print(f"coverage_rate conformalized QRNN ExtraTreesRegressor: {np.mean((preds4[1]<=y_test)*(preds4[2]>=y_test))}")
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds4[2][0:max_idx],
y_l = preds4[1][0:max_idx],
pred = preds4[0][0:max_idx],
shade_color=split_color2,
title = f"conformalized QRNN ExtraTreesRegressor ({max_idx} first points in test set)")
regr4 = ns.CustomRegressor(ExtraTreesRegressor()) # 5 hidden nodes, ReLU activation function
regr4.fit(X_train, y_train)
start = time()
preds4 = regr4.predict(X_test, method="splitconformal", return_pi=True, level=90)
print(f"Elapsed: {time() - start}s")
print(f"preds4: {preds4}")
print(f"coverage_rate conformalized QRNN ExtraTreesRegressor: {np.mean((preds4[1]<=y_test)*(preds4[2]>=y_test))}")
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds4[2][0:max_idx],
y_l = preds4[1][0:max_idx],
pred = preds4[0][0:max_idx],
shade_color=split_color2,
title = f"conformalized QRNN ExtraTreesRegressor ({max_idx} first points in test set)")
regr4 = ns.CustomRegressor(ExtraTreesRegressor()) # 5 hidden nodes, ReLU activation function regr4.fit(X_train, y_train) start = time() preds4 = regr4.predict(X_test, method="splitconformal", return_pi=True, level=90) print(f"Elapsed: {time() - start}s") print(f"preds4: {preds4}") print(f"coverage_rate conformalized QRNN ExtraTreesRegressor: {np.mean((preds4[1]<=y_test)*(preds4[2]>=y_test))}") plot_func(x = range(max_idx), y = y_test[0:max_idx], y_u = preds4[2][0:max_idx], y_l = preds4[1][0:max_idx], pred = preds4[0][0:max_idx], shade_color=split_color2, title = f"conformalized QRNN ExtraTreesRegressor ({max_idx} first points in test set)")
preds4: (array([2.1156401, 1.11028 , 1.40237 , ..., 0.91221 , 1.94403 ,
3.1501305]), array([1.2909301, 0.28557 , 0.57766 , ..., 0.0875 , 1.11932 ,
2.3254205]), array([2.9403501, 1.93499 , 2.22708 , ..., 1.73692 , 2.76874 ,
3.9748405]))
coverage_rate conformalized QRNN ExtraTreesRegressor: 0.9011627906976745
preds4: (array([2.1156401, 1.11028 , 1.40237 , ..., 0.91221 , 1.94403 ,
3.1501305]), array([1.2909301, 0.28557 , 0.57766 , ..., 0.0875 , 1.11932 ,
2.3254205]), array([2.9403501, 1.93499 , 2.22708 , ..., 1.73692 , 2.76874 ,
3.9748405]))
coverage_rate conformalized QRNN ExtraTreesRegressor: 0.9011627906976745
preds4: (array([2.1156401, 1.11028 , 1.40237 , ..., 0.91221 , 1.94403 , 3.1501305]), array([1.2909301, 0.28557 , 0.57766 , ..., 0.0875 , 1.11932 , 2.3254205]), array([2.9403501, 1.93499 , 2.22708 , ..., 1.73692 , 2.76874 , 3.9748405])) coverage_rate conformalized QRNN ExtraTreesRegressor: 0.9011627906976745
3 – 2 Bayesian Quasi-Randomized Nnetworks
Bayesian Ridge
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
print(X.shape)
regr = ns.CustomRegressor(BayesianRidge())
start = time()
regr.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr.predict(X_test, return_std=True)
print(f"coverage_rate Bayesian Ridge: {np.mean((preds[2]<=y_test)*(preds[3]>=y_test))}")
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
print(X.shape)
regr = ns.CustomRegressor(BayesianRidge())
start = time()
regr.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr.predict(X_test, return_std=True)
print(f"coverage_rate Bayesian Ridge: {np.mean((preds[2]<=y_test)*(preds[3]>=y_test))}")
data = load_diabetes() X = data.data y= data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123) print(X.shape) regr = ns.CustomRegressor(BayesianRidge()) start = time() regr.fit(X_train, y_train) print(f"Elapsed: {time() - start}s") preds = regr.predict(X_test, return_std=True) print(f"coverage_rate Bayesian Ridge: {np.mean((preds[2]<=y_test)*(preds[3]>=y_test))}")
(442, 10)
coverage_rate Bayesian Ridge: 0.9775280898876404
(442, 10)
coverage_rate Bayesian Ridge: 0.9775280898876404
(442, 10) coverage_rate Bayesian Ridge: 0.9775280898876404
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds[3][0:max_idx],
y_l = preds[2][0:max_idx],
pred = preds[0][0:max_idx],
shade_color=split_color,
title = f"Bayesian Ridge QRNN ({max_idx} first points in test set)")
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds[3][0:max_idx],
y_l = preds[2][0:max_idx],
pred = preds[0][0:max_idx],
shade_color=split_color,
title = f"Bayesian Ridge QRNN ({max_idx} first points in test set)")
plot_func(x = range(max_idx), y = y_test[0:max_idx], y_u = preds[3][0:max_idx], y_l = preds[2][0:max_idx], pred = preds[0][0:max_idx], shade_color=split_color, title = f"Bayesian Ridge QRNN ({max_idx} first points in test set)")
ARD Regression
regr2 = ns.CustomRegressor(ARDRegression())
start = time()
regr2.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds2 = regr2.predict(X_test, return_std=True)
print(f"coverage_rate ARD Regressor: {np.mean((preds2[2]<=y_test)*(preds2[3]>=y_test))}")
regr2 = ns.CustomRegressor(ARDRegression())
start = time()
regr2.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds2 = regr2.predict(X_test, return_std=True)
print(f"coverage_rate ARD Regressor: {np.mean((preds2[2]<=y_test)*(preds2[3]>=y_test))}")
regr2 = ns.CustomRegressor(ARDRegression()) start = time() regr2.fit(X_train, y_train) print(f"Elapsed: {time() - start}s") preds2 = regr2.predict(X_test, return_std=True) print(f"coverage_rate ARD Regressor: {np.mean((preds2[2]<=y_test)*(preds2[3]>=y_test))}")
coverage_rate ARD Regressor: 0.9775280898876404
coverage_rate ARD Regressor: 0.9775280898876404
coverage_rate ARD Regressor: 0.9775280898876404
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds2[3][0:max_idx],
y_l = preds2[2][0:max_idx],
pred = preds2[0][0:max_idx],
shade_color=split_color,
title = f"QRNN ARD Regressor ({max_idx} first points in test set)")
plot_func(x = range(max_idx),
y = y_test[0:max_idx],
y_u = preds2[3][0:max_idx],
y_l = preds2[2][0:max_idx],
pred = preds2[0][0:max_idx],
shade_color=split_color,
title = f"QRNN ARD Regressor ({max_idx} first points in test set)")
plot_func(x = range(max_idx), y = y_test[0:max_idx], y_u = preds2[3][0:max_idx], y_l = preds2[2][0:max_idx], pred = preds2[0][0:max_idx], shade_color=split_color, title = f"QRNN ARD Regressor ({max_idx} first points in test set)")
To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - Python .
Want to share your content on python-bloggers? click here.