From f11a5a82e23d025090b725b5ddc81453fb2f6f70 Mon Sep 17 00:00:00 2001 From: Tom Selier Date: Sat, 21 Oct 2023 20:38:07 +0200 Subject: [PATCH] the worky version --- .../decision_tree/decision_tree.py | 15 +++--- src/helpers/test/decision_tree.py | 54 ++++++++++--------- src/suite.py | 6 ++- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/experiments/decision_tree/decision_tree.py b/src/experiments/decision_tree/decision_tree.py index fef2b00..398bed9 100644 --- a/src/experiments/decision_tree/decision_tree.py +++ b/src/experiments/decision_tree/decision_tree.py @@ -46,10 +46,10 @@ with open(PATH, 'r') as file: i += 1 # Werkt niet met genormaliseerde data - normalized = preprocessing.normalize(data, axis=0, norm='max') + normalized = preprocessing.normalize(data, axis=0, norm='') norm = list(normalized.tolist()) -steps = np.linspace(1e-4, 1, 20, dtype=np.float64) +steps = np.linspace(1, 12, 4, dtype=np.int64) print("Step \t seconds/step") for step in steps: @@ -78,11 +78,12 @@ for step in steps: # model = ensemble.ExtraTreesClassifier( # n_estimators=step # higher is better, but slower (def: 100) # ) - # model = neighbors.KNeighborsClassifier( - # algorithm='auto', - # leaf_size=2, - # n_neighbors=step, - # ) + model = neighbors.KNeighborsClassifier( + algorithm='auto', + leaf_size=2, + n_neighbors=step, + ) + # model = ensemble.BaggingClassifier( # n_estimators=5, # max_samples=.5, diff --git a/src/helpers/test/decision_tree.py b/src/helpers/test/decision_tree.py index 9cbd08d..4bede1d 100644 --- a/src/helpers/test/decision_tree.py +++ b/src/helpers/test/decision_tree.py @@ -1,15 +1,22 @@ from enum import Enum -from sklearn.preprocessing import maxabs_scale, MaxAbsScaler from sklearn.ensemble import RandomForestClassifier from joblib import dump, load from sklearn import tree +import numpy as np import csv import argparse import os parser = argparse.ArgumentParser(prog='DecisionTree CLI') parser.add_argument('-i', '--input', help='Input CSV file', required=True) -parser.add_argument('-o', '--output', help='Output model file', required=True) +parser.add_argument('-o', '--output', help='Output model folder', required=True) +parser.add_argument( + '-m', + '--model', + help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', + required=True + ) +parser.add_argument('-s', '--scaler', help='Scaler preprocesser', required=True) class Tree(Enum): ACCASIA = 0 @@ -52,19 +59,18 @@ class CVSuiteTestTree: i += 1 # normalize data - if self.scaler is not None: - norm = self.scaler.fit(data) - for row in norm: - print(len(row)) - else: - raise EnvironmentError("No scaler found") + #TODO: Arne help # train model - self.train(norm, labels, output) + self.train(data, labels, output) def addScaler(self, path) -> None: self.scaler = load(path) + if self.scaler is None: + print("Scaler failed to load!") + exit() + def train(self, data, labels, output) -> None: print("You called the parent class, doofus") @@ -75,7 +81,7 @@ class CVSuiteTestTree: def predict(self, data) -> None | int: if self.model is not None: - return self.model.predict([data]) + return self.model.predict(data) else: return None @@ -110,18 +116,16 @@ class CVSuiteTestExtraTrees(CVSuiteTestTree): if __name__ == "__main__": args = parser.parse_args() - test = CVSuiteTestRandomForest() - test.trainCSV(args.input, args.output) - test = CVSuiteTestDecisionTree( - "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\models\\randomforest.joblib" - ) - path = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\result-2023-10-21T09.59.24.csv" - file = open(path, 'r') - reader = csv.reader(file, delimiter=',') - matrix = list(reader) - file.close() - - data = [float(x) for x in matrix[2][2:]] - norm = maxabs_scale(data) - - print(test.predict(norm)) \ No newline at end of file + + if args.model == 'dectree': + test = CVSuiteTestDecisionTree() + elif args.model == 'randforest': + test = CVSuiteTestRandomForest() + elif args.model == 'extratree': + test = CVSuiteTestExtraTrees() + else: + print("Model not found!") + exit() + + test.addScaler(args.scaler) + test.trainCSV(args.input, args.output) \ No newline at end of file diff --git a/src/suite.py b/src/suite.py index c6f0939..c2b2629 100644 --- a/src/suite.py +++ b/src/suite.py @@ -347,8 +347,12 @@ class CVSuite: print(C_WARN, "KNN Model not configured!") if self.test_dectree is not None: - print(self.test_dectree.predict(data)) + result = self.test_dectree.predict(data) output.insert("end", "Decision Tree Result:\n") + output.insert("end", f"\t{Tree(result).name}\n") + + print(C_DBUG, "Decision Tree Result:") + print("\t\t result: \t{}".format(result)) else: print(C_WARN, "Decison Tree Model not configured!")