1 import numpy as np 2 import matplotlib.pyplot as plt 3 from .plot_helpers import cm2, cm3, discrete_scatter 4 5 def _call_classifier_chunked(classifier_pred_or_decide, X): 6 # The chunk_size is used to chunk the large arrays to work with x86 7 # memory models that are restricted to < 2 GB in memory allocation. The 8 # chunk_size value used here is based on a measurement with the 9 # MLPClassifier using the following parameters: 10 # MLPClassifier(solver='lbfgs', random_state=0, 11 # hidden_layer_sizes=[1000,1000,1000]) 12 # by reducing the value it is possible to trade in time for memory. 13 # It is possible to chunk the array as the calculations are independent of 14 # each other. 15 # Note: an intermittent version made a distinction between 16 # 32- and 64 bit architectures avoiding the chunking. Testing revealed 17 # that even on 64 bit architectures the chunking increases the 18 # performance by a factor of 3-5, largely due to the avoidance of memory 19 # swapping. 20 chunk_size = 10000 21 22 # We use a list to collect all result chunks 23 Y_result_chunks = [] 24 25 # Call the classifier in chunks. 26 for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0], 27 chunk_size, dtype=np.int32), 28 axis=0): 29 Y_result_chunks.append(classifier_pred_or_decide(x_chunk)) 30 31 return np.concatenate(Y_result_chunks) 32 33 34 def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None, 35 alpha=1, cm=cm3): 36 # multiclass 37 if eps is None: 38 eps = X.std() / 2. 39 40 if ax is None: 41 ax = plt.gca() 42 43 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps 44 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps 45 xx = np.linspace(x_min, x_max, 1000) 46 yy = np.linspace(y_min, y_max, 1000) 47 48 X1, X2 = np.meshgrid(xx, yy) 49 X_grid = np.c_[X1.ravel(), X2.ravel()] 50 decision_values = classifier.predict(X_grid) 51 ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max, 52 y_min, y_max), 53 aspect='auto', origin='lower', alpha=alpha, cmap=cm) 54 ax.set_xlim(x_min, x_max) 55 ax.set_ylim(y_min, y_max) 56 ax.set_xticks(()) 57 ax.set_yticks(()) 58 59 60 def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis", 61 function=None): 62 # binary with fill 63 if eps is None: 64 eps = X.std() / 2. 65 66 if ax is None: 67 ax = plt.gca() 68 69 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps 70 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps 71 xx = np.linspace(x_min, x_max, 100) 72 yy = np.linspace(y_min, y_max, 100) 73 74 X1, X2 = np.meshgrid(xx, yy) 75 X_grid = np.c_[X1.ravel(), X2.ravel()] 76 if function is None: 77 function = getattr(classifier, "decision_function", 78 getattr(classifier, "predict_proba")) 79 else: 80 function = getattr(classifier, function) 81 decision_values = function(X_grid) 82 if decision_values.ndim > 1 and decision_values.shape[1] > 1: 83 # predict_proba 84 decision_values = decision_values[:, 1] 85 grr = ax.imshow(decision_values.reshape(X1.shape), 86 extent=(x_min, x_max, y_min, y_max), aspect='auto', 87 origin='lower', alpha=alpha, cmap=cm) 88 89 ax.set_xlim(x_min, x_max) 90 ax.set_ylim(y_min, y_max) 91 ax.set_xticks(()) 92 ax.set_yticks(()) 93 return grr 94 95 96 def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1, 97 cm=cm2, linewidth=None, threshold=None, 98 linestyle="solid"): 99 # binary? 100 if eps is None: 101 eps = X.std() / 2. 102 103 if ax is None: 104 ax = plt.gca() 105 106 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps 107 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps 108 xx = np.linspace(x_min, x_max, 1000) 109 yy = np.linspace(y_min, y_max, 1000) 110 111 X1, X2 = np.meshgrid(xx, yy) 112 X_grid = np.c_[X1.ravel(), X2.ravel()] 113 if hasattr(classifier, "decision_function"): 114 decision_values = _call_classifier_chunked(classifier.decision_function, 115 X_grid) 116 levels = [0] if threshold is None else [threshold] 117 fill_levels = [decision_values.min()] + levels + [ 118 decision_values.max()] 119 else: 120 # no decision_function 121 decision_values = _call_classifier_chunked(classifier.predict_proba, 122 X_grid)[:, 1] 123 levels = [.5] if threshold is None else [threshold] 124 fill_levels = [0] + levels + [1] 125 if fill: 126 ax.contourf(X1, X2, decision_values.reshape(X1.shape), 127 levels=fill_levels, alpha=alpha, cmap=cm) 128 else: 129 ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels, 130 colors="black", alpha=alpha, linewidths=linewidth, 131 linestyles=linestyle, zorder=5) 132 133 ax.set_xlim(x_min, x_max) 134 ax.set_ylim(y_min, y_max) 135 ax.set_xticks(()) 136 ax.set_yticks(()) 137 138 139 if __name__ == '__main__': 140 from sklearn.datasets import make_blobs 141 from sklearn.linear_model import LogisticRegression 142 X, y = make_blobs(centers=2, random_state=42) 143 clf = LogisticRegression(solver='lbfgs').fit(X, y) 144 plot_2d_separator(clf, X, fill=True) 145 discrete_scatter(X[:, 0], X[:, 1], y) 146 plt.show()