Note
Go to the end to download the full example code
Default DL85Classifier on COMPAS dataset¶
######################################################################
# DL8.5 default classifier #
######################################################################
Model building...
Model built. Duration of building = 0.2881
Confusion Matrix below
[[543 246]
[204 450]]
Accuracy DL8.5 on training set = 0.6801
Accuracy DL8.5 on test set = 0.6881
Serialized json tree: {'feat': 2, 'left': {'feat': 1, 'left': {'feat': 0, 'left': {'value': 1, 'error': 40}, 'right': {'feat': 8, 'left': {'value': 1, 'error': 142}, 'right': {'value': 0, 'error': 35}}}, 'right': {'feat': 10, 'left': {'feat': 24, 'left': {'value': 1, 'error': 9}, 'right': {'value': 0, 'error': 61}}, 'right': {'feat': 22, 'left': {'value': 0, 'error': 64}, 'right': {'value': 1, 'error': 140}}}}, 'right': {'feat': 23, 'left': {'feat': 26, 'left': {'feat': 18, 'left': {'value': 0, 'error': 193}, 'right': {'value': 1, 'error': 8}}, 'right': {'feat': 5, 'left': {'value': 0, 'error': 251}, 'right': {'value': 1, 'error': 113}}}, 'right': {'feat': 4, 'left': {'feat': 22, 'left': {'value': 0, 'error': 191}, 'right': {'value': 1, 'error': 288}}, 'right': {'feat': 25, 'left': {'value': 1, 'error': 151}, 'right': {'value': 0, 'error': 160}}}}, 'proba': [0.5118696932940565, 0.4881303067059435]}
import time
import graphviz
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pydl85 import DL85Classifier, Cache_Type
dataset = np.genfromtxt("../datasets/compas.csv", delimiter=',', skip_header=1)
X, y = dataset[:, :-1], dataset[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# read the column names
with open("../datasets/compas.csv", 'r') as f:
col_names = f.readline().strip().split(',')
col_names = col_names[:-1]
print("######################################################################\n"
"# DL8.5 default classifier #\n"
"######################################################################")
clf = DL85Classifier(max_depth=4, cache_type=Cache_Type.Cache_HashCover)
start = time.perf_counter()
print("Model building...")
clf.fit(X_train, y_train)
duration = time.perf_counter() - start
print("Model built. Duration of building =", round(duration, 4))
y_pred = clf.predict(X_test)
print("Confusion Matrix below")
print(confusion_matrix(y_test, y_pred))
print("Accuracy DL8.5 on training set =", round(clf.accuracy_, 4))
print("Accuracy DL8.5 on test set =", round(accuracy_score(y_test, y_pred), 4))
# print the tree
print("Serialized json tree:", clf.tree_)
dot = clf.export_graphviz(feature_names=col_names, class_names=["No Recidivism", "Recidivism"])
# uncomment the following lines to save the tree as a png file
# graph = graphviz.Source(dot, format="png")
# graph.render("plots/compas_odt")
Total running time of the script: ( 0 minutes 0.411 seconds)