From 76c5f61ddaded54305c0538f842cef67ed40ae89 Mon Sep 17 00:00:00 2001 From: Tom Selier Date: Fri, 20 Oct 2023 16:11:10 +0200 Subject: [PATCH] cleanup --- .../decision_tree/decision_tree.py | 100 ++++++++++-------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/src/experiments/decision_tree/decision_tree.py b/src/experiments/decision_tree/decision_tree.py index 30653c6..fef2b00 100644 --- a/src/experiments/decision_tree/decision_tree.py +++ b/src/experiments/decision_tree/decision_tree.py @@ -1,14 +1,14 @@ -from enum import Enum +# models from sklearn import tree -from sklearn import metrics from sklearn import preprocessing from sklearn import neighbors from sklearn import ensemble from sklearn import svm -from matplotlib import pyplot as plt -import pandas as pd + +# other +from enum import Enum import numpy as np -import random +import time import csv import plots @@ -49,58 +49,68 @@ with open(PATH, 'r') as file: normalized = preprocessing.normalize(data, axis=0, norm='max') norm = list(normalized.tolist()) -steps = np.linspace(0.1, 1.0, 10, dtype=np.float64) +steps = np.linspace(1e-4, 1, 20, dtype=np.float64) +print("Step \t seconds/step") for step in steps: actual = [] predicted = [] + time_start = time.time() - for i in range(len(norm)): - temp_data = norm.pop(i) - temp_label = labels.pop(i) + for j in range(3): + for i in range(len(norm)): + temp_data = norm.pop(i) + temp_label = labels.pop(i) - # model = tree.DecisionTreeClassifier( - # class_weight=None, - # min_samples_leaf=2, - # max_depth=None, # < 5 is worse, None good too - # random_state=False, # No change - # criterion='gini', # MCC + 0.1 - # splitter='best', - # ccp_alpha=0 # Pruning: Keep this 0 - # ) - # model = ensemble.RandomForestClassifier( - # n_estimators=20, # higher is better, but slower (def: 100) - # criterion='gini', # gini best - # ) - # model = ensemble.ExtraTreesClassifier( - # n_estimators=150 # higher is better, but slower (def: 100) - # ) - # model = neighbors.KNeighborsClassifier( - # algorithm='auto', - # leaf_size=2, - # n_neighbors=step, - # ) - model = ensemble.BaggingClassifier( - n_estimators=5, - max_samples=.5, - max_features=.5, - bootstrap=False - ) - # model = svm.SVC(decision_function_shape='ovr' - # ) - model = model.fit(norm, labels) - result = model.predict([temp_data]) + # model = tree.DecisionTreeClassifier( + # class_weight=None, + # min_samples_leaf=2, + # max_depth=None, # < 5 is worse, None good too + # random_state=False, # No change + # criterion='gini', # MCC + 0.1 + # splitter='best', + # ccp_alpha=0 # Pruning: Keep this 0 + # ) + # model = ensemble.RandomForestClassifier( + # n_estimators=20, # higher is better, but slower (def: 100) + # criterion='gini', # gini best + # ) + # model = ensemble.ExtraTreesClassifier( + # n_estimators=step # higher is better, but slower (def: 100) + # ) + # model = neighbors.KNeighborsClassifier( + # algorithm='auto', + # leaf_size=2, + # n_neighbors=step, + # ) + # model = ensemble.BaggingClassifier( + # n_estimators=5, + # max_samples=.5, + # max_features=.5, + # bootstrap=False + # ) + # model = svm.SVC( + # C = 0.8, + # kernel = "poly", + # degree = 5, + # coef0 = 6, + # probability = False, + # break_ties=True, + # decision_function_shape = 'ovr' + # ) + model = model.fit(norm, labels) + result = model.predict([temp_data]) - norm.append(temp_data) - labels.append(temp_label) + norm.append(temp_data) + labels.append(temp_label) - actual.append(temp_label) - predicted.append(result[0]) + actual.append(temp_label) + predicted.append(result[0]) actual_list.append(actual) predicted_list.append(predicted) - print(step) + print("%.4f"%step, "\t", "%.2f"%(time.time()-time_start)) plots.plotMetrics(actual_list, predicted_list) plots.plotConfusion(actual_list[0], predicted_list[0])