From 76c5f61ddaded54305c0538f842cef67ed40ae89 Mon Sep 17 00:00:00 2001
From: Tom Selier <tbj.selier@gmail.com>
Date: Fri, 20 Oct 2023 16:11:10 +0200
Subject: [PATCH] cleanup

---
 .../decision_tree/decision_tree.py            | 100 ++++++++++--------
 1 file changed, 55 insertions(+), 45 deletions(-)

diff --git a/src/experiments/decision_tree/decision_tree.py b/src/experiments/decision_tree/decision_tree.py
index 30653c6..fef2b00 100644
--- a/src/experiments/decision_tree/decision_tree.py
+++ b/src/experiments/decision_tree/decision_tree.py
@@ -1,14 +1,14 @@
-from enum import Enum
+# models
 from sklearn import tree
-from sklearn import metrics
 from sklearn import preprocessing
 from sklearn import neighbors
 from sklearn import ensemble
 from sklearn import svm
-from matplotlib import pyplot as plt
-import pandas as pd
+
+# other
+from enum import Enum
 import numpy as np
-import random
+import time
 import csv
 import plots
 
@@ -49,58 +49,68 @@ with open(PATH, 'r') as file:
     normalized = preprocessing.normalize(data, axis=0, norm='max')
     norm = list(normalized.tolist())
 
-steps = np.linspace(0.1, 1.0, 10, dtype=np.float64)
+steps = np.linspace(1e-4, 1, 20, dtype=np.float64)
 
+print("Step \t seconds/step")
 for step in steps:
     actual = []
     predicted = []
+    time_start = time.time()
 
-    for i in range(len(norm)):
-        temp_data = norm.pop(i)
-        temp_label = labels.pop(i)
+    for j in range(3):
+        for i in range(len(norm)):
+            temp_data = norm.pop(i)
+            temp_label = labels.pop(i)
 
-        # model = tree.DecisionTreeClassifier(
-        #     class_weight=None,
-        #     min_samples_leaf=2,
-        #     max_depth=None, # < 5 is worse, None good too
-        #     random_state=False, # No change
-        #     criterion='gini', # MCC + 0.1
-        #     splitter='best',
-        #     ccp_alpha=0 # Pruning: Keep this 0
-        # )
-        # model = ensemble.RandomForestClassifier(
-        #     n_estimators=20, # higher is better, but slower (def: 100)
-        #     criterion='gini', # gini best
-        # )
-        # model = ensemble.ExtraTreesClassifier(
-        #     n_estimators=150 # higher is better, but slower (def: 100)
-        # )
-        # model = neighbors.KNeighborsClassifier(
-        #     algorithm='auto',
-        #     leaf_size=2,
-        #     n_neighbors=step,
-        # )
-        model = ensemble.BaggingClassifier(
-            n_estimators=5,
-            max_samples=.5,
-            max_features=.5,
-            bootstrap=False
-        )
-        # model = svm.SVC(decision_function_shape='ovr'
-        # )
-        model = model.fit(norm, labels)
-        result = model.predict([temp_data])
+            # model = tree.DecisionTreeClassifier(
+            #     class_weight=None,
+            #     min_samples_leaf=2,
+            #     max_depth=None, # < 5 is worse, None good too
+            #     random_state=False, # No change
+            #     criterion='gini', # MCC + 0.1
+            #     splitter='best',
+            #     ccp_alpha=0 # Pruning: Keep this 0
+            # )
+            # model = ensemble.RandomForestClassifier(
+            #     n_estimators=20, # higher is better, but slower (def: 100)
+            #     criterion='gini', # gini best
+            # )
+            # model = ensemble.ExtraTreesClassifier(
+            #     n_estimators=step # higher is better, but slower (def: 100)
+            # )
+            # model = neighbors.KNeighborsClassifier(
+            #     algorithm='auto',
+            #     leaf_size=2,
+            #     n_neighbors=step,
+            # )
+            # model = ensemble.BaggingClassifier(
+            #     n_estimators=5,
+            #     max_samples=.5,
+            #     max_features=.5,
+            #     bootstrap=False
+            # )
+            # model = svm.SVC(
+            #     C = 0.8,
+            #     kernel = "poly",
+            #     degree = 5,
+            #     coef0 = 6,
+            #     probability = False,
+            #     break_ties=True,
+            #     decision_function_shape = 'ovr'
+            # )
+            model = model.fit(norm, labels)
+            result = model.predict([temp_data])
 
-        norm.append(temp_data)
-        labels.append(temp_label)
+            norm.append(temp_data)
+            labels.append(temp_label)
 
-        actual.append(temp_label)
-        predicted.append(result[0])
+            actual.append(temp_label)
+            predicted.append(result[0])
 
     actual_list.append(actual)
     predicted_list.append(predicted)
 
-    print(step)
+    print("%.4f"%step, "\t", "%.2f"%(time.time()-time_start))
 
 plots.plotMetrics(actual_list, predicted_list)
 plots.plotConfusion(actual_list[0], predicted_list[0])