This article was first published on
T. Moudiki's Webpage - Python , and kindly contributed to python-bloggers. (You can report issue about the content on this page here)
Want to share your content on python-bloggers? click here.
Want to share your content on python-bloggers? click here.
I talked about learningmachine
– a package for machine learning with uncertainty quantification and interpretatbility – last week in #131.
Here comes the Python version!
Keep in mind that learningmachine
is still experimental, probably with some quirks (because achieving this level of abstraction required some effort), with no beautiful documentation, but you can already tinker it and do advanced analysis, as shown below.
0 – Install and load packages
%load_ext rpy2.ipython
%%R utils::install.packages("c('remotes', 'ranger')") remotes::install_github("Techtonique/learningmachine")
!pip install learningmachine --upgrade --no-cache-dir
import learningmachine as lm import numpy as np from sklearn.datasets import load_breast_cancer, load_wine from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from time import time
1 – Adjust classifiers
clf_list = []
fit_obj = lm.Classifier(method = "ranger", level=None, nb_hidden=None) dataset = load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13) start = time() fit_obj.fit(X_train, y_train) print("Elapsed time: ", time() - start) preds = fit_obj.predict(X_test) score = np.mean(preds.ravel().astype(int) == y_test) clf_list.append((fit_obj, "ranger", fit_obj.predict_proba(X_test), score)) print(classification_report(y_test, preds.ravel().astype(int)))
Elapsed time: 2.2424495220184326 precision recall f1-score support 0 0.83 0.94 0.88 36 1 0.97 0.91 0.94 78 accuracy 0.92 114 macro avg 0.90 0.93 0.91 114 weighted avg 0.93 0.92 0.92 114
fit_obj = lm.Classifier(method = "ranger", level=None, nb_hidden=25) dataset = load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13) start = time() fit_obj.fit(X_train, y_train) print("Elapsed time: ", time() - start) score = np.mean(fit_obj.predict(X_test).ravel().astype(int) == y_test) clf_list.append((fit_obj, "ranger_qrnn", fit_obj.predict_proba(X_test), score)) print(classification_report(y_test, preds.ravel().astype(int)))
Elapsed time: 0.4195363521575928 precision recall f1-score support 0 0.83 0.94 0.88 36 1 0.97 0.91 0.94 78 accuracy 0.92 114 macro avg 0.90 0.93 0.91 114 weighted avg 0.93 0.92 0.92 114
fit_obj = lm.Classifier(method = "ranger", level=95, nb_hidden=0) dataset = load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13) start = time() fit_obj.fit(X_train, y_train) print("Elapsed time: ", time() - start) preds = fit_obj.predict(X_test) score = np.mean(preds.ravel().astype(int) == y_test) clf_list.append((fit_obj, "ranger_calibrated", fit_obj.predict_proba(X_test), score)) print(classification_report(y_test, preds.ravel().astype(int)))
Elapsed time: 0.23552465438842773 precision recall f1-score support 0 0.88 0.97 0.92 36 1 0.99 0.94 0.96 78 accuracy 0.95 114 macro avg 0.93 0.95 0.94 114 weighted avg 0.95 0.95 0.95 114
fit_obj = lm.Classifier(method = "ranger", level=95, nb_hidden=25) dataset = load_breast_cancer() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13) start = time() fit_obj.fit(X_train, y_train) print("Elapsed time: ", time() - start) preds = fit_obj.predict(X_test) score = np.mean(preds.ravel().astype(int) == y_test) clf_list.append((fit_obj, "ranger_qrnn_calibrated", fit_obj.predict_proba(X_test), score)) print(classification_report(y_test, preds.ravel().astype(int)))
Elapsed time: 0.23675990104675293 precision recall f1-score support 0 0.85 0.97 0.91 36 1 0.99 0.92 0.95 78 accuracy 0.94 114 macro avg 0.92 0.95 0.93 114 weighted avg 0.94 0.94 0.94 114
names = [clf_list[i][1] for i in range(len(clf_list))] classifiers = [clf_list[i][0] for i in range(len(clf_list))] scores = [clf_list[i][2] for i in range(len(clf_list))]
2 – Visualizing classifiers in 2D
import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import ListedColormap from sklearn.datasets import make_circles, make_classification, make_moons from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler X, y = make_classification( n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1 ) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable, ] figure = plt.figure(figsize=(27, 9)) i = 1 # iterate over datasets for ds_cnt, ds in enumerate(datasets): # preprocess dataset, split into training and test part X, y = ds[0], ds[1] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.4, random_state=42 ) x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 # just plot the dataset first cm = plt.cm.RdBu cm_bright = ListedColormap(["#FF0000", "#0000FF"]) ax = plt.subplot(len(datasets), len(classifiers) + 1, i) if ds_cnt == 0: ax.set_title("Input data") # Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k") # Plot the testing points ax.scatter( X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k" ) ax.set_xlim(x_min, x_max) ax.set_ylim(y_min, y_max) ax.set_xticks(()) ax.set_yticks(()) i += 1 # iterate over classifiers for name, clf in zip(names, classifiers): ax = plt.subplot(len(datasets), len(classifiers) + 1, i) clf = make_pipeline(StandardScaler(), clf) clf.fit(X_train, y_train) try: score = clf.score(X_test, y_test) except: # no scoring method available yet for prediction sets score = np.mean(clf.predict_proba(X_test).argmax(axis=1) == y_test) DecisionBoundaryDisplay.from_estimator( clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5 ) # Plot the training points ax.scatter( X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k" ) # Plot the testing points ax.scatter( X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, edgecolors="k", alpha=0.6, ) ax.set_xlim(x_min, x_max) ax.set_ylim(y_min, y_max) ax.set_xticks(()) ax.set_yticks(()) if ds_cnt == 0: ax.set_title(name) ax.text( x_max - 0.3, y_min + 0.3, ("%.2f" % score).lstrip("0"), size=15, horizontalalignment="right", ) i += 1 plt.tight_layout() plt.show()
To leave a comment for the author, please follow the link and comment on their blog: T. Moudiki's Webpage - Python .
Want to share your content on python-bloggers? click here.