Bayesian inference and conformal prediction (prediction intervals) in nnetsauce v0.18.1

This article was first published on T. Moudiki's Webpage - Python , and kindly contributed to python-bloggers. (You can report issue about the content on this page here)
Want to share your content on python-bloggers? click here.



Open In Colab

Version 0.18.1 of nnetsauce (Python version) is available on PyPI and for conda. New developments include Bayesian inference and conformal prediction. Bayesian inference is available for scikit-learn models that possess a posterior distribution (BayesianRidge, ARDRegressor, and GaussianProcessRegressor). Conformal prediction is available for every regression model that follows the “fit_predict” API. Conformal prediction for classification will be available in future versions.

Note: In examples, QRNN = Quasi-Randomized Nnetworks

1 – Installation

!pip uninstall nnetsauce --yes
!pip install nnetsauce --upgrade --no-cache-dir
!pip install matplotlib==3.1.3
import os 
import nnetsauce as ns 
import matplotlib.pyplot as plt 
import numpy as np 
import warnings
from sklearn.datasets import fetch_california_housing, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV
from sklearn.ensemble import ExtraTreesRegressor
from time import time 

2 – Useful plotting functions

warnings.filterwarnings('ignore')

split_color = 'green'
split_color2 = 'orange'
local_color = 'gray'

def plot_func(x,
              y,
              y_u=None,
              y_l=None,
              pred=None,
              shade_color="",
              method_name="",
              title=""):

    fig = plt.figure()

    plt.plot(x, y, 'k.', alpha=.3, markersize=10,
             fillstyle='full', label=u'Test set observations')

    if (y_u is not None) and (y_l is not None):
        plt.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_u, y_l[::-1]]),
                 alpha=.3, fc=shade_color, ec='None',
                 label = method_name + ' Prediction interval')

    if pred is not None:
        plt.plot(x, pred, 'k--', lw=2, alpha=0.9,
                 label=u'Predicted value')

    #plt.ylim([-2.5, 7])
    plt.xlabel('$X$')
    plt.ylabel('$Y$')
    plt.legend(loc='upper right')
    plt.title(title)

    plt.show()

3 – Examples of use

3 – 1 Conformalized Quasi-Randomized Nnetworks

data = fetch_california_housing()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)

3 – 1 conformalized QRNN

RidgeCV

regr1 = ns.CustomRegressor(RidgeCV()) # 5 hidden nodes, ReLU activation function
regr1.fit(X_train, y_train)
start = time()
preds1 = regr1.predict(X_test, method="splitconformal", return_pi=True, level=95)
print(f"Elapsed: {time() - start}s")
print(f"coverage_rate conformalized QRNN RidgeCV: {np.mean((preds1[1]<=y_test)*(preds1[2]>=y_test))}")

max_idx = 50
plot_func(x = range(max_idx),
          y = y_test[0:max_idx],
          y_u = preds1[2][0:max_idx],
          y_l = preds1[1][0:max_idx],
          pred = preds1[0][0:max_idx],
          shade_color=split_color2,
          title = f"conformalized QRNN RidgeCV ({max_idx} first points in test set)")
coverage_rate conformalized QRNN RidgeCV: 0.9578488372093024

xxx

Extra Trees

regr4 = ns.CustomRegressor(ExtraTreesRegressor()) # 5 hidden nodes, ReLU activation function
regr4.fit(X_train, y_train)
start = time()
preds4 = regr4.predict(X_test, method="splitconformal", return_pi=True, level=90)
print(f"Elapsed: {time() - start}s")
print(f"preds4: {preds4}")
print(f"coverage_rate conformalized QRNN ExtraTreesRegressor: {np.mean((preds4[1]<=y_test)*(preds4[2]>=y_test))}")

plot_func(x = range(max_idx),
          y = y_test[0:max_idx],
          y_u = preds4[2][0:max_idx],
          y_l = preds4[1][0:max_idx],
          pred = preds4[0][0:max_idx],
          shade_color=split_color2,
          title = f"conformalized QRNN ExtraTreesRegressor ({max_idx} first points in test set)")
preds4: (array([2.1156401, 1.11028  , 1.40237  , ..., 0.91221  , 1.94403  ,
       3.1501305]), array([1.2909301, 0.28557  , 0.57766  , ..., 0.0875   , 1.11932  ,
       2.3254205]), array([2.9403501, 1.93499  , 2.22708  , ..., 1.73692  , 2.76874  ,
       3.9748405]))
coverage_rate conformalized QRNN ExtraTreesRegressor: 0.9011627906976745

xxx

3 – 2 Bayesian Quasi-Randomized Nnetworks

Bayesian Ridge

data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)

print(X.shape)
regr = ns.CustomRegressor(BayesianRidge())
start = time()
regr.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds = regr.predict(X_test, return_std=True)
print(f"coverage_rate Bayesian Ridge: {np.mean((preds[2]<=y_test)*(preds[3]>=y_test))}")

(442, 10)
coverage_rate Bayesian Ridge: 0.9775280898876404
plot_func(x = range(max_idx),
          y = y_test[0:max_idx],
          y_u = preds[3][0:max_idx],
          y_l = preds[2][0:max_idx],
          pred = preds[0][0:max_idx],
          shade_color=split_color,
          title = f"Bayesian Ridge QRNN ({max_idx} first points in test set)")

xxx

ARD Regression

regr2 = ns.CustomRegressor(ARDRegression())
start = time()
regr2.fit(X_train, y_train)
print(f"Elapsed: {time() - start}s")
preds2 = regr2.predict(X_test, return_std=True)
print(f"coverage_rate ARD Regressor: {np.mean((preds2[2]<=y_test)*(preds2[3]>=y_test))}")


coverage_rate ARD Regressor: 0.9775280898876404
plot_func(x = range(max_idx),
          y = y_test[0:max_idx],
          y_u = preds2[3][0:max_idx],
          y_l = preds2[2][0:max_idx],
          pred = preds2[0][0:max_idx],
          shade_color=split_color,
          title = f"QRNN ARD Regressor ({max_idx} first points in test set)")

xxx

To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - Python .

Want to share your content on python-bloggers? click here.