diff --git a/README.md b/README.md index f74e256..09b90da 100644 --- a/README.md +++ b/README.md @@ -84,15 +84,30 @@ $ python ./src/suite.py 1. Open CVSuite and select the desired training set 2. Press 'Run analysis for entire dataset(!)', this will export a CSV file with all preprocessed data in the `./out` directory - Based on your system configuration, this might take a while -3. Run the CVSuiteTestKNN CLI tool: +3. Run the CVSuiteTestKNN CLI tool, the following arguments are required: + - `-i` Input CSV file + - `-o` Output folder, likely `./out/models` ```sh $ python ./src/helpers/test/knn.py -i ./out/result-(date/time).csv -o ./out/models/ ``` -4. Edit your `config.json` to include the newly created model +4. The script generates two files; A fitted scaler to use with other models (`.pkl` file) and the model itself (`.yaml` file) +5. Edit your `config.json` to include the newly created model +### Train and export a Decision tree model > :memo: **Please note:**
> The KNN Training script also generates the scaler required to make the decision tree model +1. Run the CVSuiteTestTree CLI Tool using the following arguments: + - `-i` Input CSV file + - `-o` Output folder, likely `./out/models` + - `-m` Model to train; `dectree`, `randforest` or `extratree` + - `-s` Scaler file to use (`.pkl` file) +```sh +python ./src/helpers/test/decision_tree.py -i ./out/result-(date/time).csv -o ./out/models/ -m 'dectree' -s ./out/models/scale_(date/time).pkl +``` +2. The script generates one `.pkl` file based on the chosen model +3. Edit your `config.json` to include the newly created model + --- Arne van Iterson
diff --git a/src/helpers/test/__init__.py b/src/helpers/test/__init__.py new file mode 100644 index 0000000..5d1b4ab --- /dev/null +++ b/src/helpers/test/__init__.py @@ -0,0 +1,2 @@ +from .decision_tree import * +from .knn import * \ No newline at end of file diff --git a/src/helpers/test/decision_tree.py b/src/helpers/test/decision_tree.py index 05209e9..e88bd16 100644 --- a/src/helpers/test/decision_tree.py +++ b/src/helpers/test/decision_tree.py @@ -6,14 +6,24 @@ import numpy as np import csv import argparse import os +import sys -from ..tags import Tree +try: + # Perform relative import if included from CVSuite + from ..tags import Tree + from ..logger import C_DONE +except ImportError: + # This solution is hot garbage but I refuse to spend any more time on it + directory = os.path.dirname(os.path.realpath(__file__)) + sys.path.append(os.path.join(directory, '..')) + from tags import Tree + from logger import C_DONE parser = argparse.ArgumentParser(prog='DecisionTree CLI') parser.add_argument('-i', '--input', help='Input CSV file', required=True) parser.add_argument('-o', '--output', help='Output model folder', required=True) parser.add_argument('-m', '--model', help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', required=True) -parser.add_argument('-s', '--scaler', help='Scaler preprocesser', required=True) +parser.add_argument('-s', '--scaler', help='Scaler preprocessor', required=True) class CVSuiteTestTree: def __init__(self, model_path = None): @@ -83,7 +93,7 @@ class CVSuiteTestDecisionTree(CVSuiteTestTree): ccp_alpha=0 ) self.model.fit(data, labels) - self.save(output, 'decisiontree.joblib') + self.save(output, 'decisiontree.pkl') class CVSuiteTestRandomForest(CVSuiteTestTree): def train(self, data, labels, output) -> None: @@ -92,13 +102,13 @@ class CVSuiteTestRandomForest(CVSuiteTestTree): criterion='gini', ) self.model.fit(data, labels) - self.save(output, 'randomforest.joblib') + self.save(output, 'randomforest.pkl') class CVSuiteTestExtraTrees(CVSuiteTestTree): def train(self, data, labels, output) -> None: self.model = tree.ExtraTreeClassifier() self.model.fit(data, labels) - self.save(output, 'extratrees.joblib') + self.save(output, 'extratrees.pkl') if __name__ == "__main__": args = parser.parse_args() @@ -114,4 +124,5 @@ if __name__ == "__main__": exit() test.addScaler(args.scaler) - test.trainCSV(args.input, args.output) \ No newline at end of file + test.trainCSV(args.input, args.output) + print(C_DONE + "Model trained successfully!") \ No newline at end of file diff --git a/src/helpers/test/knn.py b/src/helpers/test/knn.py index 6620578..f8a3683 100644 --- a/src/helpers/test/knn.py +++ b/src/helpers/test/knn.py @@ -8,9 +8,18 @@ import yaml import joblib import datetime import os +import sys -from ..logger import C_DBUG -from ..tags import Tree +try: + # Perform relative import if included from CVSuite + from ..tags import Tree + from ..logger import C_DONE, C_DBUG +except ImportError: + # This solution is hot garbage but I refuse to spend any more time on it + directory = os.path.dirname(os.path.realpath(__file__)) + sys.path.append(os.path.join(directory, '..')) + from tags import Tree + from logger import C_DONE, C_DBUG parser = argparse.ArgumentParser(prog='KNN Train CLI') parser.add_argument('-i', '--input', help='Input CSV file', required=True) @@ -38,7 +47,7 @@ class CVSuiteTestKNN: file.close() header = data.pop(0) - print("CSV tags: ", header) + # print("CSV tags: ", header) # Get classifier tags tags_int = [] @@ -97,8 +106,8 @@ class CVSuiteTestKNN: if self.trained: raise EnvironmentError("Model already trained!") else: - print(data) - print(data.shape) + # print(data) + # print(data.shape) self.knn.train(data, cv.ml.ROW_SAMPLE, tags) # Save it @@ -116,4 +125,5 @@ class CVSuiteTestKNN: if __name__ == "__main__": args = parser.parse_args() test = CVSuiteTestKNN() - test.trainCSV(args.input, args.output) \ No newline at end of file + test.trainCSV(args.input, args.output) + print(C_DONE + "Model trained successfully!") \ No newline at end of file