In [None]:
# Code source: Sebastian Curi and Andreas Krause, based on Jaques Grobler (sklearn demos).
# License: BSD 3 clause

# We start importing some modules and running some magic commands
%matplotlib inline
%reload_ext autoreload
%load_ext autoreload
%autoreload 2

# General math and plotting modules.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.special import erfinv

# Project files.
from utilities.util import gradient_descent
from utilities.classifiers import Logistic
from utilities.regressors import TStudent
from utilities.regularizers import L2Regularizer
from utilities.load_data import polynomial_data, linear_separable_data
from utilities import plot_helpers

# Widget and formatting modules
import IPython
import ipywidgets
from ipywidgets import interact, interactive, interact_manual, fixed
from matplotlib import rcParams
# If in your browser the figures are not nicely vizualized, change the following line. 
rcParams['figure.figsize'] = (10, 5)
rcParams['font.size'] = 16

# Machine Learning library. 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn import datasets
from sklearn.linear_model import SGDRegressor, Ridge, LogisticRegression
from sklearn.model_selection import cross_val_score


In [None]:
def get_regression_dataset(dataset, X=None, n_samples=200, noise=0, w=None):
    if X is None:
        X = np.random.randn(n_samples)
    
    if dataset == 'cos':
        Y = np.cos(1.5 * np.pi * X) + noise * np.random.randn(X.shape[0])
        
    elif dataset == 'sinc':
        Y = X * np.sin(1.5 * np.pi * X) + noise * np.random.randn(X.shape[0])
        
    elif dataset == 'linear':
        X = np.atleast_2d(X).T
        Phi = PolynomialFeatures(degree=1, include_bias=True).fit_transform(X)
        Y = Phi @ w[:2] + noise * np.random.randn(X.shape[0])
    
    elif dataset == 'linear-features':
        X = np.atleast_2d(X).T
        Phi = PolynomialFeatures(degree=len(w) - 1, include_bias=True).fit_transform(X)
        Y = Phi @ w + noise * np.random.randn(X.shape[0])
    
    return X, Y
    

def get_classification_dataset(dataset, n_samples=200, noise=0.3):
    if dataset == 'linear':
        X, Y = linear_separable_data(n_samples, noise=noise, dim=2) 
        Y = (Y + 1) // 2
    elif dataset == '2-blobs':
        X, Y = datasets.make_classification(n_classes=2, n_features=2, n_informative=2, n_redundant=0,
                                            n_clusters_per_class=1, n_samples=n_samples, random_state=8)
    elif dataset == '3-blobs':
        X, Y = datasets.make_classification(n_classes=3, n_features=2, n_informative=2, n_redundant=0,
                                            n_clusters_per_class=1, n_samples=n_samples, random_state=8)
    elif dataset == '4-blobs':
        X, Y = datasets.make_classification(n_classes=4, n_features=2, n_informative=2, n_redundant=0,
                                            n_clusters_per_class=1, n_samples=n_samples, random_state=8) 
    elif dataset == 'circles':
        X, Y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)
    elif dataset == 'moons':
        X, Y = datasets.make_moons(n_samples=n_samples, noise=.05)
    elif dataset == 'iris':
        X, Y = datasets.load_iris(return_X_y=True)
        X = X[:, :2]
    elif dataset == 'imbalanced':
        X, Y = linear_separable_data(n_samples, noise=noise, dim=2, num_negative=int(n_samples * 0.2))
        Y = (Y + 1) // 2

    return X, Y

# Probabilistic Regression

We compare a regressor that uses a gaussian likelihood vs. one that uses a student-t likelihood with 2 degrees of freedom. 

In [None]:
rcParams['figure.figsize'] = (10, 6)
rcParams['font.size'] = 16

def probabilistic_regression(dataset, nu, n_samples, degree, alpha, noise, noise_type):
    np.random.seed(0)
    # DATASET

    w = np.random.randn(1 + degree)
    X = np.sort(np.random.rand(n_samples))
    _, y = get_regression_dataset(dataset, X=X, noise=0, w=w)
    ymean = np.mean(y)
    if noise_type == 'gaussian':
        y += noise * np.random.randn(*y.shape)
    elif noise_type == 'heavy-tailed':
        y += noise * np.random.standard_cauchy(*y.shape)
    y = y - np.mean(y)

    # REGRESSION
    polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
    Phi = polynomial_features.fit_transform(X[:, np.newaxis])
    Phimean = Phi.mean(axis=0)
    
    normal = Ridge(alpha=alpha)
    normal.fit(Phi - Phimean, y)
    
    student = TStudent(x=Phi - Phimean, y=y, nu=nu, sigma=noise)
    regularizer = L2Regularizer(alpha, include_bias=False)
    opts = {'eta0': 0.1, 'n_iter': 1000, 'batch_size': min(n_samples, 64), 'n_samples': X.shape[0], 
            'algorithm': 'SGD'}
    gradient_descent(normal.coef_, student, regularizer, opts=opts)
    
    # PREDICT    
    X_plot = np.linspace(-1, 2, 100)
    Phi_plot = polynomial_features.fit_transform(X_plot[:, np.newaxis]) - Phimean
    _, Y_plot = get_regression_dataset(dataset, X=X_plot, noise=0, w=w)
    Y_plot -= ymean
    
    # PLOTS
    plt.plot(X_plot, student.predict(Phi_plot), 'g-', label="Student")
    plt.plot(X_plot, normal.predict(Phi_plot), 'r-', label="Normal")
    plt.plot(X_plot, Y_plot, 'b--', label="True function")

    plt.scatter(X, y, edgecolor='b', s=20)
    plt.xlabel("x")
    plt.ylabel("y")
    plt.xlim((-0.5, 1.5))
    plt.ylim((-1 + np.min(Y_plot), 1 + np.max(Y_plot)))
    plt.legend(loc="upper left", ncol=4)
    plt.show()
    

interact(probabilistic_regression,  dataset=['cos', 'sinc', 'linear', 'linear-features'], 
         nu=ipywidgets.FloatLogSlider(value=1, min=-2, max=4, step=0.01, readout_format='.4f',
                                      description='Nu:', continuous_update=False),
         
         n_samples=ipywidgets.IntSlider(value=300, min=30, max=1500, step=1, 
                                        description='Samples:', continuous_update=False),
         degree=ipywidgets.IntSlider(value=1, min=1, max=15, step=1, 
                                     description='Degree:', continuous_update=False),
         noise=ipywidgets.FloatSlider(value=0.1, min=0, max=1, step=0.01, readout_format='.2f',
                                      description='Noise level:', continuous_update=False),
         alpha=ipywidgets.BoundedFloatText(value=0, min=0, max=1000, step=0.0001, 
                                           description='Reg Coef.:', continuous_update=False),
         noise_type=['gaussian', 'heavy-tailed']
        );

# Probabilistic Classification (Logistic Regression)

In [None]:
rcParams['figure.figsize'] = (20, 6)
rcParams['font.size'] = 22

num_points_w = ipywidgets.IntSlider(value=300, min=30, max=1500, step=1, description='Number of samples:',
                                   style={'description_width': 'initial'}, continuous_update=False)
noise_w = ipywidgets.FloatSlider(value=0.1, min=0, max=1, step=0.01, readout_format='.2f', description='Noise level:',
                                 style={'description_width': 'initial'}, continuous_update=False)
reg_w = ipywidgets.BoundedFloatText(value=0, min=0, max=1000, step=0.0001, description='Regularization:',
                                    style={'description_width': 'initial'}, continuous_update=False)
batch_size_w = ipywidgets.IntSlider(value=16, min=1, max=64, step=1, description='Batch Size:',
                                   style={'description_width': 'initial'}, continuous_update=False)
lr_w = ipywidgets.FloatLogSlider(value=0.3, min=-4, max=1, step=0.1, readout_format='.4f', description='Learning Rate:',
                                 style={'description_width': 'initial'}, continuous_update=False)
num_iter_w = ipywidgets.IntSlider(value=50, min=10, max=200, step=1, description='Num Iter:',
                                   style={'description_width': 'initial'}, continuous_update=False)
def logistic_SGD(dataset, num_points, noise, reg, batch_size, lr, num_iter):
#     np.random.seed(42)
    
    # DATASET
    X, Y = get_classification_dataset(dataset, num_points, noise)
    Y = 2 * Y - 1 
    if X.shape[1] == 2:
        ones = np.ones((X.shape[0], 1))
        X = np.concatenate((X, ones), axis=-1)
    
    Xtest, Ytest = get_classification_dataset(dataset, int(0.1 * num_points), noise)
    Ytest = 2 * Ytest - 1 
    if Xtest.shape[1] == 2:
        ones = np.ones((Xtest.shape[0], 1))
        Xtest = np.concatenate((Xtest, ones), axis=-1)

    indexes = np.arange(0, X.shape[0], 1)
    np.random.shuffle(indexes)
    X, Y = X[indexes], Y[indexes]

    # REGRESSION
    classifier = Logistic(X, Y)
    classifier.load_test_data(Xtest, Ytest)
    regularizer = L2Regularizer(reg)
    np.random.seed(42)
    w0 = np.random.randn(3, )
    
    opts = {'eta0': lr,
            'n_iter': num_iter,
            'batch_size': min(batch_size, X.shape[0]),
            'n_samples': X.shape[0],
            'algorithm': 'SGD',
            }
    
    try:
        trajectory, indexes = gradient_descent(w0, classifier, regularizer, opts)
        
        # PLOTS
        contour_plot = plt.subplot(121)
        error_plot = plt.subplot(122)

        opt = {'marker': 'ro', 'fillstyle': 'full', 'label': '+ Train', 'size': 8}
        plot_helpers.plot_data(X[np.where(Y == 1)[0], 0], X[np.where(Y == 1)[0], 1], fig=contour_plot, options=opt)
        opt = {'marker': 'bs', 'fillstyle': 'full', 'label': '- Train', 'size': 8}
        plot_helpers.plot_data(X[np.where(Y == -1)[0], 0], X[np.where(Y == -1)[0], 1], fig=contour_plot, options=opt)

        opt = {'marker': 'ro', 'fillstyle': 'none', 'label': '+ Test', 'size': 8}
        plot_helpers.plot_data(Xtest[np.where(Ytest == 1)[0], 0], Xtest[np.where(Ytest == 1)[0], 1], fig=contour_plot, options=opt)
        opt = {'marker': 'bs', 'fillstyle': 'none', 'label': '- Test', 'size': 8}
        plot_helpers.plot_data(Xtest[np.where(Ytest == -1)[0], 0], Xtest[np.where(Ytest == -1)[0], 1], fig=contour_plot, options=opt)

        contour_opts = {'n_points': 100, 'x_label': '$x$', 'y_label': '$y$', 'sgd_point': True, 'n_classes': 4}
        error_opts = {'epoch': 5, 'x_label': '$t$', 'y_label': 'error'}

        opts = {'contour_opts': contour_opts, 'error_opts': error_opts}
        plot_helpers.classification_progression(X, Y, trajectory, indexes, classifier, 
                                                contour_plot=contour_plot, error_plot=error_plot, 
                                                options=opts)
    
    except KeyboardInterrupt:
        pass 
interact_manual(logistic_SGD, dataset=['linear', 'moons', 'circles', 'imbalanced'],
                num_points=num_points_w, noise=noise_w, reg=reg_w, batch_size=batch_size_w, 
                lr=lr_w, num_iter=num_iter_w);


# Multi-class Logistic Regression

In [None]:
rcParams['figure.figsize'] = (20, 15)
rcParams['font.size'] = 16

def multi_class_lr(dataset):
    # DATASET
    X, y = get_classification_dataset(dataset, 200)
    X = X[:, :2]
    
    # REGRESSION
    model = LogisticRegression().fit(X, y)

    # PREDICT
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = .02  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    xy = np.c_[xx.ravel(), yy.ravel()]
    C = model.predict(xy)
    P = model.predict_proba(xy)
    H = -(P * model.predict_log_proba(xy)).sum(axis=1)
    
    P = P.max(axis=1)
    # Put the result into a color plot
    C = C.reshape(xx.shape)
    P = P.reshape(xx.shape)
    H = H.reshape(xx.shape)
    
    # PLOTS
    fig, axes = plt.subplots(2, 2)
    axes[0, 0].set_title('Classification Boundary')
    axes[0, 0].contourf(xx, yy, C, cmap=plt.cm.jet, alpha=0.5)
    
    axes[0, 1].set_title('Prediction Probabilities')
    cf = axes[0, 1].contourf(xx, yy, P, cmap=plt.cm.cividis_r, alpha=0.5, vmin=1. / len(np.unique(y)), vmax=1)
    m = plt.cm.ScalarMappable(cmap=plt.cm.cividis_r)
    m.set_array(P)
    m.set_clim(1. / len(np.unique(y)), 1.)
    cbar = plt.colorbar(m, ax=axes[0, 1])  
    
    axes[1, 0].set_title('Probabilistic Boundary')
    axes[1, 0].contourf(xx, yy, P * C, cmap=plt.cm.jet, alpha=0.5)
    
    axes[1, 1].set_title('Entropy')
    cf = axes[1, 1].contourf(xx, yy, H, cmap=plt.cm.cividis_r, alpha=0.5)
    # Plot also the training points
    
    plt.colorbar(cf, ax=axes[1, 1])
    for row in axes:
        for ax in row:
            ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.jet)

            ax.set_xlim(xx.min(), xx.max())
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())
            ax.set_yticks(())
    plt.show()
    
interact(multi_class_lr, dataset=['3-blobs', '4-blobs', 'iris', 'linear', 'imbalanced', '2-blobs',  'circles', 'moons']);


# Doubtful Logistic Regression


In [None]:
rcParams['figure.figsize'] = (20, 6)
rcParams['font.size'] = 16

def doubtful_logistic_regression(dataset, min_prob):
    np.random.seed(42)
    
    # DATASET
    X, y = get_classification_dataset(dataset, 200)
    X = X[:, :2]
    
    # REGRESSION
    model = LogisticRegression().fit(X, y)
    
    # PREDICT
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = .02  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    xy = np.c_[xx.ravel(), yy.ravel()]
    P = model.predict_proba(xy)
    C = 2 * model.predict(xy)
    H = -(model.predict_log_proba(xy) * P).sum(axis=1)    
    P = P.max(axis=1)

    # Doubfult STEP
    C[np.where(P < min_prob)[0]] = 1

    C = C.reshape(xx.shape)
    P = P.reshape(xx.shape)
    H = H.reshape(xx.shape)
    
    # PLOTS
    fig, axes = plt.subplots(1, 2)
    axes[0].set_title('Classification Boundary')
    axes[0].contourf(xx, yy, C, cmap=plt.cm.jet, alpha=0.5)
    
    axes[1].set_title('Probability')
    cf = axes[1].contourf(xx, yy, P, cmap=plt.cm.cividis_r, alpha=0.5)
    m = plt.cm.ScalarMappable(cmap=plt.cm.cividis_r)
    m.set_array(P)
    m.set_clim(1. / len(np.unique(y)), 1.)
    cbar = plt.colorbar(m, ax=axes[1])  
    # Plot also the training points
    
    for ax in axes:
        ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.jet)

        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
    plt.show()
  
    
interact(doubtful_logistic_regression, dataset=['linear', 'imbalanced', '2-blobs', '3-blobs', '4-blobs', 'circles', 'moons', 'iris'],
        min_prob=ipywidgets.FloatSlider(value=0.75, min=0.25, max=1, step=0.01, continuous_update=False));

# Cost Sensitive Classification (Logistic Regression)


In [None]:
rcParams['figure.figsize'] = (20,15)
rcParams['font.size'] = 16

def cost_sensitive_logistic_regression(dataset, cost_ratio):
    np.random.seed(0)
    class_weight = {0: cost_ratio}

    # DATASET
    X, y = get_classification_dataset(dataset, 200)
    X = X[:, :2]
    
    # REGRESSION
    model = LogisticRegression(class_weight=class_weight).fit(X, y)

    # PREDICT
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = .02  # step size in the mesh
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    xy = np.c_[xx.ravel(), yy.ravel()]
    P = model.predict_proba(xy)
    C = 2 * model.predict(xy)
    H = -(model.predict_log_proba(xy) * P).sum(axis=1)    
    P = P.max(axis=1)

    
    C = C.reshape(xx.shape)
    P = P.reshape(xx.shape)
    H = H.reshape(xx.shape)

    # PLOTS
    fig, axes = plt.subplots(2, 2)
    axes[0, 0].set_title('Classification Boundary')
    axes[0, 0].contourf(xx, yy, C, cmap=plt.cm.jet, alpha=0.5, vmin=0, vmax=1)

    axes[0, 1].set_title('Prediction Probabilities')
    cf = axes[0, 1].contourf(xx, yy, P, cmap=plt.cm.cividis_r, alpha=0.5, vmin=1. / len(np.unique(y)), vmax=1)
    m = plt.cm.ScalarMappable(cmap=plt.cm.cividis_r)
    m.set_array(P)
    m.set_clim(1. / len(np.unique(y)), 1.)
    cbar = plt.colorbar(m, ax=axes[0, 1])  

    axes[1, 0].set_title('Probabilistic Boundary')
    axes[1, 0].contourf(xx, yy, P * C, cmap=plt.cm.jet, alpha=0.5, vmin=0, vmax=1)

    axes[1, 1].set_title('Entropy')
    cf = axes[1, 1].contourf(xx, yy, H, cmap=plt.cm.cividis_r, alpha=0.5)
    # Plot also the training points

    plt.colorbar(cf, ax=axes[1, 1])
    for row in axes:
        for ax in row:
            ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.jet, vmin=0, vmax=1)

            ax.set_xlim(xx.min(), xx.max())
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())
            ax.set_yticks(())
    plt.show()

interact(cost_sensitive_logistic_regression, 
         dataset=['linear', 'imbalanced', '2-blobs', 'circles', 'moons'],
        cost_ratio=ipywidgets.FloatLogSlider(value=1, min=-3, max=4, step=0.1, continuous_update=False));


# Cost-Sensitive Linear Regression

In [None]:
rcParams['figure.figsize'] = (10, 6)
rcParams['font.size'] = 16

def cost_sensitive_linear_regression(dataset, over_estimation_cost_ratio, degree, alpha, n_samples, noise):
    np.random.seed(42)
    ratio = 1 / (1 + over_estimation_cost_ratio)
    
    # DATASET
    w_star = np.array([1, 0.2, -0.3, 4])
    X = np.sort(np.random.rand(n_samples))
    _, f = get_regression_dataset(dataset, n_samples=200, X=X, noise=0, w=w_star)
    _, y = get_regression_dataset(dataset, n_samples=200, X=X, noise=noise, w=w_star)

    # REGRESSION
    Phi = PolynomialFeatures(degree=degree, include_bias=True).fit_transform(np.atleast_2d(X).T)
    w_hat = Ridge(alpha=alpha, fit_intercept=False).fit(Phi, y).coef_

    # PREDICT
    X_test = np.linspace(-1, 2, 100)
    _, f_test = get_regression_dataset(dataset, n_samples=200, X=X_test, noise=0, w=w_star)
    Phi_test = PolynomialFeatures(degree=degree, include_bias=True).fit_transform(np.atleast_2d(X_test).T)
    y_equal = Phi_test @ w_hat
    
    # COST SENSITIVITY
    y_sensitive = y_equal + noise * np.sqrt(2) * erfinv(2 * ratio - 1)
    
    # PLOT
    plt.plot(X, y, '*')
    plt.plot(X_test, y_sensitive, label='Cost Sensitive')
    plt.plot(X_test, y_equal, label='Linear Regression')
    plt.plot(X_test, f_test, label='True Function')
    plt.legend(loc='upper left', ncol=4)

    plt.ylim(-2, 2);
    
interact(cost_sensitive_linear_regression,  dataset=['cos', 'sinc', 'linear', 'linear-features'], 
         over_estimation_cost_ratio=ipywidgets.FloatLogSlider(value=0.1, min=-3, max=3, step=0.1, 
                                                              readout_format='.4f',
                                      description='Ratio:', continuous_update=False),
         n_samples=ipywidgets.IntSlider(value=30, min=30, max=1500, step=1, 
                                        description='N Samples:', continuous_update=False),
         degree=ipywidgets.IntSlider(value=1, min=1, max=9, step=1, 
                                     description='Poly Degree:', continuous_update=False),
         alpha=ipywidgets.BoundedFloatText(value=0, min=0, max=1000, step=0.0001, 
                                           description='Reg Coef.:', continuous_update=False),
         noise=ipywidgets.FloatSlider(value=0.3, min=0, max=1, step=0.01, readout_format='.2f',
                                      description='Noise level:', continuous_update=False)
        );

# Uncertainty Sampling in Logistic Regression

In [None]:
rcParams['figure.figsize'] = (16, 5)
rcParams['font.size'] = 16

queried_set = {}
def uncertainty_sampling(dataset, criterion, noise):    
    query_button = ipywidgets.Button(description="Query new point")
    update_button = ipywidgets.Button(description="Update Model")
    restart_button = ipywidgets.Button(description="Restart")
    X, Y = get_classification_dataset(dataset, 200, noise=noise)
    num_classes = len(np.unique(Y)) - 1
    X = X[:, :2]

    indexes = np.arange(X.shape[0])
    index_set = set([i for i in indexes])


    def plot(model, X, Y, queried_set, next_idx=None, display_query=True):
        neg_i = np.where(Y == 0)[0]
        pos_i = np.where(Y == 1)[0]

        queried_idx = [i for i in queried_set]
        non_queried_idx = [i for i in index_set.difference(queried_set)]

        qX, qY = X[queried_idx], Y[queried_idx]
        nX, nY = X[non_queried_idx], Y[non_queried_idx]

        # Model prediction contours.
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        h = .02 
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        xy = np.c_[xx.ravel(), yy.ravel()]
        P = model.predict_proba(xy).max(axis=1).reshape(xx.shape)
        C = model.predict(xy).reshape(xx.shape)
        H = -(model.predict_proba(xy) * model.predict_log_proba(xy)).sum(axis=1).reshape(xx.shape)
        
        # PLOTS
        fig, axes = plt.subplots(1, 2)
        axes[0].set_title('Classification Boundary')
        axes[0].contourf(xx, yy, C, cmap=plt.cm.jet, alpha=0.5, vmin=0, vmax=num_classes)
        
        if criterion == 'max-entropy':
            axes[1].set_title('Entropy')
            cf = axes[1].contourf(xx, yy, H, cmap=plt.cm.cividis_r, alpha=0.5)
            m = plt.cm.ScalarMappable(cmap=plt.cm.cividis_r)
            m.set_array(H)
            cbar = plt.colorbar(m, ax=axes[1])  
            cbar.set_label('Predicted Entropy', rotation=270, labelpad=20)
            
        elif criterion == 'min-probability':
            axes[1].set_title('Probability')
            cf = axes[1].contourf(xx, yy, P, cmap=plt.cm.cividis_r, alpha=0.5)
            m = plt.cm.ScalarMappable(cmap=plt.cm.cividis_r)
            m.set_array(P)
            cbar = plt.colorbar(m, ax=axes[1])  
            cbar.set_label('Predicted Probability', rotation=270, labelpad=20)

        # Plot also the training points
        for ax in axes:
            ax.scatter(qX[:, 0], qX[:, 1], c=qY, marker='o', s=200, cmap=plt.cm.jet, vmin=0, vmax=num_classes)
            ax.scatter(nX[:, 0], nX[:, 1], c=nY, marker='o', alpha=0.3, s=20, cmap=plt.cm.jet, vmin=0, vmax=num_classes)
            
            if next_idx is not None:
                ax.scatter(X[[next_idx], 0], X[[next_idx], 1], c=Y[[next_idx]], s=400, marker='*',
                           cmap=plt.cm.jet, vmin=0, vmax=num_classes)
            
            ax.set_xlim(xx.min(), xx.max())
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())
            ax.set_yticks(())

        
        IPython.display.clear_output(wait=True)
        IPython.display.display(plt.gcf())
        plt.close()

        if display_query:
            display(query_button)
        else:
            display(update_button)
        display(restart_button)

    def update_model(b):
        global queried_set, model

        queried_idx = [i for i in queried_set]
        model = LogisticRegression(C=10).fit(X[queried_idx], Y[queried_idx])

        plot(model, X, Y, queried_set, next_idx=None, display_query=True)
    
    def restart(b):
        global queried_set
        queried_set = set()
        classes = np.unique(Y)
        for c in classes:
            i = np.random.choice(np.where(Y == c)[0])
            queried_set.add(i)
        update_model(None)

    def append_point(b):
        global queried_set, model
        
        query_points = X
        probs = model.predict_proba(X).max(axis=1)
        H = model.predict_log_proba(X) * model.predict_proba(X)
        H = H.sum(axis=1)

        queried_idx = [i for i in queried_set]
        probs[queried_idx] = float('Inf')
        H[queried_idx] = float('Inf')

        if criterion == 'max-entropy':
            i = np.argmin(H)
        elif criterion == 'min-probability':
            i = np.argmin(probs)

        plot(model, X, Y, queried_set,  i, display_query=False)
        queried_set.add(i)

    query_button.on_click(append_point)
    update_button.on_click(update_model)
    restart_button.on_click(restart)

    restart(None);
    
interact(uncertainty_sampling, 
         dataset=['linear', 'imbalanced', '2-blobs', '3-blobs', '4-blobs', 'iris', 'circles', 'moons'], 
         criterion=['min-probability', 'max-entropy'],
         noise=ipywidgets.FloatSlider(value=0.25, min=0, max=1, step=0.01, readout_format='.2f',
                                      continuous_update=False));