learningmachine v1.1.2: for Python

Posted on April 1, 2024 by T. Moudiki in Data science | 0 Comments

This article was first published on T. Moudiki's Webpage - Python , and kindly contributed to python-bloggers. (You can report issue about the content on this page here)
Want to share your content on python-bloggers? click here.

I talked about learningmachine – a package for machine learning with uncertainty quantification and interpretatbility – last week in #131.
Here comes the Python version!

Keep in mind that learningmachine is still experimental, probably with some quirks (because achieving this level of abstraction required some effort), with no beautiful documentation, but you can already tinker it and do advanced analysis, as shown below.

0 – Install and load packages

%load_ext rpy2.ipython

%%R

utils::install.packages("c('remotes', 'ranger')")
remotes::install_github("Techtonique/learningmachine")

!pip install learningmachine --upgrade --no-cache-dir

import learningmachine as lm
import numpy as np

from sklearn.datasets import load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from time import time

1 – Adjust classifiers

clf_list = []

fit_obj = lm.Classifier(method = "ranger", level=None, nb_hidden=None)
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=13)

start = time()
fit_obj.fit(X_train, y_train)
print("Elapsed time: ", time() - start)
preds = fit_obj.predict(X_test)
score = np.mean(preds.ravel().astype(int) == y_test)
clf_list.append((fit_obj, "ranger", fit_obj.predict_proba(X_test), score))

print(classification_report(y_test, preds.ravel().astype(int)))

Elapsed time:  2.2424495220184326
              precision    recall  f1-score   support

           0       0.83      0.94      0.88        36
           1       0.97      0.91      0.94        78

    accuracy                           0.92       114
   macro avg       0.90      0.93      0.91       114
weighted avg       0.93      0.92      0.92       114

fit_obj = lm.Classifier(method = "ranger", level=None, nb_hidden=25)
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=13)

start = time()
fit_obj.fit(X_train, y_train)
print("Elapsed time: ", time() - start)
score = np.mean(fit_obj.predict(X_test).ravel().astype(int) == y_test)
clf_list.append((fit_obj, "ranger_qrnn", fit_obj.predict_proba(X_test), score))

print(classification_report(y_test, preds.ravel().astype(int)))

Elapsed time:  0.4195363521575928
              precision    recall  f1-score   support

           0       0.83      0.94      0.88        36
           1       0.97      0.91      0.94        78

    accuracy                           0.92       114
   macro avg       0.90      0.93      0.91       114
weighted avg       0.93      0.92      0.92       114

fit_obj = lm.Classifier(method = "ranger", level=95, nb_hidden=0)
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=13)

start = time()
fit_obj.fit(X_train, y_train)
print("Elapsed time: ", time() - start)
preds = fit_obj.predict(X_test)
score = np.mean(preds.ravel().astype(int) == y_test)
clf_list.append((fit_obj, "ranger_calibrated", fit_obj.predict_proba(X_test), score))

print(classification_report(y_test, preds.ravel().astype(int)))

Elapsed time:  0.23552465438842773
              precision    recall  f1-score   support

           0       0.88      0.97      0.92        36
           1       0.99      0.94      0.96        78

    accuracy                           0.95       114
   macro avg       0.93      0.95      0.94       114
weighted avg       0.95      0.95      0.95       114

fit_obj = lm.Classifier(method = "ranger", level=95, nb_hidden=25)
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=13)

start = time()
fit_obj.fit(X_train, y_train)
print("Elapsed time: ", time() - start)
preds = fit_obj.predict(X_test)
score = np.mean(preds.ravel().astype(int) == y_test)
clf_list.append((fit_obj, "ranger_qrnn_calibrated", fit_obj.predict_proba(X_test), score))

print(classification_report(y_test, preds.ravel().astype(int)))

Elapsed time:  0.23675990104675293
              precision    recall  f1-score   support

           0       0.85      0.97      0.91        36
           1       0.99      0.92      0.95        78

    accuracy                           0.94       114
   macro avg       0.92      0.95      0.93       114
weighted avg       0.94      0.94      0.94       114

names = [clf_list[i][1] for i in range(len(clf_list))]
classifiers = [clf_list[i][0] for i in range(len(clf_list))]
scores = [clf_list[i][2] for i in range(len(clf_list))]

2 – Visualizing classifiers in 2D

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


X, y = make_classification(
    n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1
)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [
    make_moons(noise=0.3, random_state=0),
    make_circles(noise=0.2, factor=0.5, random_state=1),
    linearly_separable,
]

figure = plt.figure(figsize=(27, 9))
i = 1
# iterate over datasets
for ds_cnt, ds in enumerate(datasets):
    # preprocess dataset, split into training and test part
    X, y = ds[0], ds[1]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.4, random_state=42
    )

    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

    # just plot the dataset first
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(["#FF0000", "#0000FF"])
    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
    if ds_cnt == 0:
        ax.set_title("Input data")
    # Plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
    # Plot the testing points
    ax.scatter(
        X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
    )
    ax.set_xlim(x_min, x_max)
    ax.set_ylim(y_min, y_max)
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)

        clf = make_pipeline(StandardScaler(), clf)
        clf.fit(X_train, y_train)
        try:
            score = clf.score(X_test, y_test)
        except: # no scoring method available yet for prediction sets
            score = np.mean(clf.predict_proba(X_test).argmax(axis=1) == y_test)
        DecisionBoundaryDisplay.from_estimator(
            clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
        )

        # Plot the training points
        ax.scatter(
            X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k"
        )
        # Plot the testing points
        ax.scatter(
            X_test[:, 0],
            X_test[:, 1],
            c=y_test,
            cmap=cm_bright,
            edgecolors="k",
            alpha=0.6,
        )

        ax.set_xlim(x_min, x_max)
        ax.set_ylim(y_min, y_max)
        ax.set_xticks(())
        ax.set_yticks(())
        if ds_cnt == 0:
            ax.set_title(name)
        ax.text(
            x_max - 0.3,
            y_min + 0.3,
            ("%.2f" % score).lstrip("0"),
            size=15,
            horizontalalignment="right",
        )
        i += 1

plt.tight_layout()
plt.show()

xxx

To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - Python .

Want to share your content on python-bloggers? click here.

Python-bloggers

Data science news and tutorials - contributed by Python bloggers

learningmachine v1.1.2: for Python

0 – Install and load packages

1 – Adjust classifiers

2 – Visualizing classifiers in 2D

Related