Adaptive (online/streaming) learning with uncertainty quantification using Polyak averaging in learningmachine

This article was first published on T. Moudiki's Webpage - Python , and kindly contributed to python-bloggers. (You can report issue about the content on this page here)
Want to share your content on python-bloggers? click here.

The model presented here is a frequentist – conformalized – version of the Bayesian one presented in #152. It is implemented in learningmachine, both in Python and R, and is updated as new observations arrive, using Polyak averaging. Model explanations are given as sensitivity analyses.

1 – R version

%load_ext rpy2.ipython
%%R

utils::install.packages("bayesianrvfl", repos = c("https://techtonique.r-universe.dev", "https://cloud.r-project.org"))
utils::install.packages("learningmachine", repos = c("https://techtonique.r-universe.dev", "https://cloud.r-project.org"))
%%R

library(learningmachine)

X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg

set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.6*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 19 10
dim(X_test)
[1] 13 10
%%R

obj <- learningmachine::Regressor$new(method = "bayesianrvfl",
                                      nb_hidden = 5L)
obj$get_type()
[1] "regression"
%%R

obj_GCV <- bayesianrvfl::fit_rvfl(x = X_train, y = y_train)
(best_lambda <- obj_GCV$lambda[which.min(obj_GCV$GCV)])
[1] 12.9155
%%R

t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = best_lambda)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
Elapsed:  0.01 s 
%%R

previous_coefs <- drop(obj$model$coef)

newx <- X_test[1, ]
newy <- y_test[1]

new_X_test <- X_test[-1, ]
new_y_test <- y_test[-1]

t0 <- proc.time()[3]
obj$update(newx, newy, method = "polyak", alpha = 0.6)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")

print(summary(previous_coefs))

print(summary(drop(obj$model$coef) - previous_coefs))


plot(drop(obj$model$coef) - previous_coefs, type='l')
abline(h = 0, lty=2, col="red")

Elapsed:  0.003 s 
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-0.96778 -0.51401 -0.16335 -0.05234  0.31900  0.98482 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-0.065436 -0.002152  0.027994  0.015974  0.040033  0.058892 

xxx

%%R

print(obj$summary(new_X_test, y=new_y_test, show_progress=FALSE))
$R_squared
[1] 0.6692541

$R_squared_adj
[1] -2.638205

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.5014 -2.2111 -0.5532 -0.3928  1.3495  3.9206 

$Coverage_rate
[1] 100

$citests
         estimate        lower        upper      p-value signif
cyl   -41.4815528  -43.6039915  -39.3591140 1.306085e-13    ***
disp   -0.5937584   -0.7014857   -0.4860311 1.040246e-07    ***
hp     -1.0226867   -1.2175471   -0.8278263 1.719172e-07    ***
drat   84.5859637   73.2987057   95.8732217 4.178658e-09    ***
wt   -169.1047879 -189.5595154 -148.6500603 1.469605e-09    ***
qsec   22.3026258   15.1341951   29.4710566 2.772362e-05    ***
vs    113.3209911   88.3101728  138.3318093 7.599984e-07    ***
am    175.1639102  139.5755741  210.7522464 3.304560e-07    ***
gear   44.3270639   36.1456398   52.5084881 1.240722e-07    ***
carb  -59.6511203  -69.8576126  -49.4446280 5.677270e-08    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             12     
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75     p100
 1 cyl            -41.5    3.34   -43.4    -43.4    -43.3    -41.7    -34.5  
 2 disp            -0.594  0.170   -0.916   -0.635   -0.505   -0.505   -0.356
 3 hp              -1.02   0.307   -1.44    -1.40    -0.877   -0.768   -0.768
 4 drat            84.6   17.8     59.5     76.7     89.5     89.5    128.   
 5 wt            -169.    32.2   -204.    -199.    -166.    -138.    -138.   
 6 qsec            22.3   11.3     13.3     13.3     17.4     29.2     40.1  
 7 vs             113.    39.4     59.6     94.4     94.4    117.     191.   
 8 am             175.    56.0    124.     124.     153.     226.     245.   
 9 gear            44.3   12.9     26.3     38.7     47.9     47.9     76.0  
10 carb           -59.7   16.1    -77.3    -74.6    -58.2    -44.4    -44.4  
   hist 
 1 ▇▁▁▁▂
 2 ▂▁▃▇▁
 3 ▅▁▁▂▇
 4 ▂▃▇▁▁
 5 ▇▁▁▁▇
 6 ▇▂▁▁▃
 7 ▁▇▃▁▂
 8 ▇▁▁▂▃
 9 ▂▃▇▁▁
10 ▇▁▁▁▇
%%R

res <- obj$predict(X = new_X_test)

new_y_train <- c(y_train, newy)

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

xxx

%%R

newx <- X_test[2, ]
newy <- y_test[2]

new_X_test <- X_test[-c(1, 2), ]
new_y_test <- y_test[-c(1, 2)]

t0 <- proc.time()[3]
obj$update(newx, newy, method = "polyak", alpha = 0.9)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")

print(obj$summary(new_X_test, y=new_y_test, show_progress=FALSE))


res <- obj$predict(X = new_X_test)

new_y_train <- c(y_train, y_test[c(1, 2)])

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
Elapsed:  0.003 s 
$R_squared
[1] 0.6426871

$R_squared_adj
[1] -Inf

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.5686 -2.4084 -1.0397 -0.3897  1.5507  4.0215 

$Coverage_rate
[1] 100

$citests
         estimate        lower        upper      p-value signif
cyl   -42.1261096  -44.5327541  -39.7194651 2.932516e-12    ***
disp   -0.6256505   -0.7347381   -0.5165629 1.613495e-07    ***
hp     -1.0139634   -1.2198651   -0.8080617 6.747693e-07    ***
drat   82.8645391   74.8033348   90.9257434 5.680663e-10    ***
wt   -170.7891742 -193.1932631 -148.3850853 1.053193e-08    ***
qsec   22.2365552   13.9564091   30.5167012 1.350094e-04    ***
vs    119.1784891   94.0163626  144.3406157 9.681321e-07    ***
am    174.2138307  134.1390652  214.2885963 2.127371e-06    ***
gear   42.7943293   36.9622907   48.6263678 1.523695e-08    ***
carb  -59.4034661  -70.5135723  -48.2933599 3.127231e-07    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             11     
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75     p100
 1 cyl            -42.1    3.58   -44.1    -44.1    -44.1    -43.0    -34.9  
 2 disp            -0.626  0.162   -0.933   -0.643   -0.514   -0.514   -0.514
 3 hp              -1.01   0.306   -1.47    -1.24    -0.787   -0.787   -0.787
 4 drat            82.9   12.0     61.2     79.6     91.7     91.7     91.7  
 5 wt            -171.    33.3   -210.    -204.    -142.    -142.    -142.   
 6 qsec            22.2   12.3     13.2     13.2     13.2     30.7     41.2  
 7 vs             119.    37.5     96.0     96.0     96.0    117.     193.   
 8 am             174.    59.7    123.     123.     123.     233.     247.   
 9 gear            42.8    8.68    27.1     40.4     49.2     49.2     49.2  
10 carb           -59.4   16.5    -78.8    -76.0    -45.1    -45.1    -45.1  
   hist 
 1 ▇▁▁▁▂
 2 ▂▁▁▃▇
 3 ▃▁▁▂▇
 4 ▂▁▁▃▇
 5 ▇▁▁▁▇
 6 ▇▂▁▁▃
 7 ▇▃▁▁▂
 8 ▇▁▁▂▃
 9 ▂▁▁▃▇
10 ▇▁▁▁▇

xxx

2 – Python version

!pip install git+https://github.com/Techtonique/learningmachine_python.git --verbose
import pandas as pd
import numpy as np
import warnings
import learningmachine as lm


# Load the mtcars dataset
data = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv")
X = data.drop("mpg", axis=1).values
X = pd.DataFrame(X).iloc[:,1:]
X = X.astype(np.float16)
X.columns = ["cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"]
y = data["mpg"].values
display(X.describe())
display(X.head())
display(X.dtypes)
cyldisphpdratwtqsecvsamgearcarb
count32.00000032.00000032.000032.00000032.00000032.00000032.00000032.00000032.00000032.000000
mean6.187500230.750000146.75003.5957033.21679717.8437500.4375000.4062503.6875002.812500
std1.786133123.93750068.56250.5346680.9785161.7880860.5039060.4990230.7377931.615234
min4.00000071.12500052.00002.7597661.51269514.5000000.0000000.0000003.0000001.000000
25%4.000000120.82812596.50003.0800782.58056616.8984380.0000000.0000003.0000002.000000
50%6.000000196.312500123.00003.6943363.32519517.7031250.0000000.0000004.0000002.000000
75%8.000000326.000000180.00003.9199223.61035218.9062501.0000001.0000004.0000004.000000
max8.000000472.000000335.00004.9296885.42578122.9062501.0000001.0000005.0000008.000000

cyldisphpdratwtqsecvsamgearcarb
06.0160.0110.03.9003912.61914116.4531250.01.04.04.0
16.0160.0110.03.9003912.87500017.0156250.01.04.04.0
24.0108.093.03.8496092.32031218.6093751.01.04.01.0
36.0258.0110.03.0800783.21484419.4375001.00.03.01.0
48.0360.0175.03.1503913.43945317.0156250.00.03.02.0

0
cylfloat16
dispfloat16
hpfloat16
dratfloat16
wtfloat16
qsecfloat16
vsfloat16
amfloat16
gearfloat16
carbfloat16

y.dtype
dtype('float64')
X
cyldisphpdratwtqsecvsamgearcarb
06.0160.0000110.03.9003912.61914116.4531250.01.04.04.0
16.0160.0000110.03.9003912.87500017.0156250.01.04.04.0
24.0108.000093.03.8496092.32031218.6093751.01.04.01.0
36.0258.0000110.03.0800783.21484419.4375001.00.03.01.0
48.0360.0000175.03.1503913.43945317.0156250.00.03.02.0
56.0225.0000105.02.7597663.46093820.2187501.00.03.01.0
68.0360.0000245.03.2109383.57031215.8437500.00.03.04.0
74.0146.750062.03.6894533.18945320.0000001.00.04.02.0
84.0140.750095.03.9199223.15039122.9062501.00.04.02.0
96.0167.6250123.03.9199223.43945318.2968751.00.04.04.0
106.0167.6250123.03.9199223.43945318.9062501.00.04.04.0
118.0275.7500180.03.0703124.07031217.4062500.00.03.03.0
128.0275.7500180.03.0703123.73046917.5937500.00.03.03.0
138.0275.7500180.03.0703123.77929718.0000000.00.03.03.0
148.0472.0000205.02.9296885.25000017.9843750.00.03.04.0
158.0460.0000215.03.0000005.42578117.8125000.00.03.04.0
168.0440.0000230.03.2304695.34375017.4218750.00.03.04.0
174.078.687566.04.0781252.19921919.4687501.01.04.01.0
184.075.687552.04.9296881.61523418.5156251.01.04.02.0
194.071.125065.04.2187501.83496119.9062501.01.04.01.0
204.0120.125097.03.6992192.46484420.0156251.00.03.01.0
218.0318.0000150.02.7597663.51953116.8750000.00.03.02.0
228.0304.0000150.03.1503913.43554717.2968750.00.03.02.0
238.0350.0000245.03.7304693.83984415.4062500.00.03.04.0
248.0400.0000175.03.0800783.84570317.0468750.00.03.02.0
254.079.000066.04.0781251.93457018.9062501.01.04.01.0
264.0120.312591.04.4296882.14062516.7031250.01.05.02.0
274.095.1250113.03.7695311.51269516.9062501.01.05.02.0
288.0351.0000264.04.2187503.16992214.5000000.01.05.04.0
296.0145.0000175.03.6191412.76953115.5000000.01.05.06.0
308.0301.0000335.03.5390623.57031214.6015620.01.05.08.0
314.0121.0000109.04.1093752.77929718.5937501.01.04.02.0


# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Create a Bayesian RVFL regressor object
obj = lm.Regressor(method = "bayesianrvfl", nb_hidden = 5)

# Fit the model using the training data
obj.fit(X_train, y_train, reg_lambda=12.9155)

# Print the summary of the model
print(obj.summary(X_test, y=y_test, show_progress=False))
$R_squared
[1] 0.6416309

$R_squared_adj
[1] 1.537554

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.0724 -2.0122 -0.1018 -0.1941  1.4361  3.9676 

$Coverage_rate
[1] 100

$citests
         estimate       lower        upper      p-value signif
cyl   -24.5943583  -40.407994   -8.7807230 8.909365e-03     **
disp   -0.2419797   -0.370835   -0.1131245 3.711077e-03     **
hp     -1.5734483   -1.722903   -1.4239939 2.255640e-07    ***
drat  142.5646192  124.575179  160.5540599 1.217808e-06    ***
wt   -144.8871352 -158.911143 -130.8631275 2.523441e-07    ***
qsec   46.8290859   27.829411   65.8287611 9.388045e-04    ***
vs     75.0555146   30.645127  119.4659017 6.110043e-03     **
am    207.5935234  133.205572  281.9814744 4.843095e-04    ***
gear   73.6892658   60.186232   87.1922995 1.091470e-05    ***
carb  -71.2974988  -79.480400  -63.1145974 6.944475e-07    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             7      
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean     sd       p0      p25      p50      p75       p100
 1 cyl            -24.6   17.1    -38.5    -38.5    -33.4    -12.4      1.66   
 2 disp            -0.242  0.139   -0.351   -0.351   -0.285   -0.181    0.00546
 3 hp              -1.57   0.162   -1.90    -1.61    -1.48    -1.47    -1.47   
 4 drat           143.    19.5    125.     125.     141.     154.     174.     
 5 wt            -145.    15.2   -167.    -152.    -142.    -142.    -117.     
 6 qsec            46.8   20.5     14.1     35.3     55.7     62.4     62.4    
 7 vs              75.1   48.0     37.2     37.2     58.7     93.9    167.     
 8 am             208.    80.4     64.3    168.     250.     267.     267.     
 9 gear            73.7   14.6     60.6     60.6     72.7     82.1     96.9    
10 carb           -71.3    8.85   -84.4    -75.2    -69.7    -69.7    -55.2    
   hist 
 1 ▇▁▂▁▃
 2 ▇▂▁▂▂
 3 ▂▁▁▃▇
 4 ▇▂▂▂▂
 5 ▂▅▇▁▂
 6 ▃▁▁▂▇
 7 ▇▁▃▁▂
 8 ▂▂▁▂▇
 9 ▇▂▂▂▂
10 ▂▅▇▁▂
# Select the first test sample
newx = X_test.iloc[0,:]
newy = y_test[0]

# Update the model with the new sample
new_X_test = X_test[1:]
new_y_test = y_test[1:]
obj.update(newx, newy, method="polyak", alpha=0.9)

# Print the summary of the model
print(obj.summary(new_X_test, y=new_y_test, show_progress=False))
$R_squared
[1] 0.6051442

$R_squared_adj
[1] 1.394856

$Residuals
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4.6214 -2.5055 -1.5003 -0.8308  1.2738  3.2794 

$Coverage_rate
[1] 100

$citests
         estimate        lower       upper      p-value signif
cyl   -30.0502823  -48.4171958  -11.683369 8.442658e-03     **
disp   -0.2958477   -0.4386085   -0.153087 3.121989e-03     **
hp     -1.6053302   -1.6789750   -1.531685 3.424156e-08    ***
drat  153.7968829  131.5239191  176.069847 1.041460e-05    ***
wt   -155.1954135 -174.4144275 -135.976399 4.804729e-06    ***
qsec   49.8967685   26.9993778   72.794159 2.504905e-03     **
vs     87.4170764   32.4599776  142.374175 9.457226e-03     **
am    214.5918910  119.8712855  309.312496 2.108825e-03     **
gear   83.1355825   65.3159018  100.955263 7.110354e-05    ***
carb  -77.1384645  -88.2087477  -66.068181 9.958425e-06    ***

$effects
── Data Summary ────────────────────────
                           Values 
Name                       effects
Number of rows             6      
Number of columns          10     
_______________________           
Column type frequency:            
  numeric                  10     
________________________          
Group variables            None   

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     mean      sd       p0      p25      p50      p75      p100
 1 cyl            -30.1   17.5     -42.5    -42.5    -39.9    -17.7     -4.35  
 2 disp            -0.296  0.136    -0.377   -0.377   -0.343   -0.308   -0.0269
 3 hp              -1.61   0.0702   -1.70    -1.66    -1.57    -1.57    -1.53  
 4 drat           154.    21.2     137.     137.     144.     169.     185.    
 5 wt            -155.    18.3    -182.    -160.    -154.    -154.    -125.    
 6 qsec            49.9   21.8      18.8     33.3     60.6     66.6     66.6   
 7 vs              87.4   52.4      46.7     46.7     73.7    105.     178.    
 8 am             215.    90.3      65.6    169.     266.     275.     275.    
 9 gear            83.1   17.0      70.0     70.0     75.5     94.1    109.    
10 carb           -77.1   10.5     -92.5    -79.9    -76.6    -76.6    -59.7   
   hist 
 1 ▇▁▁▁▃
 2 ▇▂▁▁▂
 3 ▅▁▁▇▂
 4 ▇▂▁▁▅
 5 ▂▂▇▁▂
 6 ▅▁▁▂▇
 7 ▇▁▅▁▂
 8 ▂▂▁▁▇
 9 ▇▂▁▂▂
10 ▂▂▇▁▂
To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - Python .

Want to share your content on python-bloggers? click here.