"Fixed" imports for CLI tools

This commit is contained in:
Arne van Iterson 2023-10-22 14:51:19 +02:00
parent 49fd6576a0
commit 4dcd021490
4 changed files with 52 additions and 14 deletions

View File

@ -84,15 +84,30 @@ $ python ./src/suite.py
1. Open CVSuite and select the desired training set 1. Open CVSuite and select the desired training set
2. Press 'Run analysis for entire dataset(!)', this will export a CSV file with all preprocessed data in the `./out` directory 2. Press 'Run analysis for entire dataset(!)', this will export a CSV file with all preprocessed data in the `./out` directory
- Based on your system configuration, this might take a while - Based on your system configuration, this might take a while
3. Run the CVSuiteTestKNN CLI tool: 3. Run the CVSuiteTestKNN CLI tool, the following arguments are required:
- `-i` Input CSV file
- `-o` Output folder, likely `./out/models`
```sh ```sh
$ python ./src/helpers/test/knn.py -i ./out/result-(date/time).csv -o ./out/models/ $ python ./src/helpers/test/knn.py -i ./out/result-(date/time).csv -o ./out/models/
``` ```
4. Edit your `config.json` to include the newly created model 4. The script generates two files; A fitted scaler to use with other models (`.pkl` file) and the model itself (`.yaml` file)
5. Edit your `config.json` to include the newly created model
### Train and export a Decision tree model
> :memo: **Please note:**<br> > :memo: **Please note:**<br>
> The KNN Training script also generates the scaler required to make the decision tree model > The KNN Training script also generates the scaler required to make the decision tree model
1. Run the CVSuiteTestTree CLI Tool using the following arguments:
- `-i` Input CSV file
- `-o` Output folder, likely `./out/models`
- `-m` Model to train; `dectree`, `randforest` or `extratree`
- `-s` Scaler file to use (`.pkl` file)
```sh
python ./src/helpers/test/decision_tree.py -i ./out/result-(date/time).csv -o ./out/models/ -m 'dectree' -s ./out/models/scale_(date/time).pkl
```
2. The script generates one `.pkl` file based on the chosen model
3. Edit your `config.json` to include the newly created model
--- ---
Arne van Iterson<br> Arne van Iterson<br>

View File

@ -0,0 +1,2 @@
from .decision_tree import *
from .knn import *

View File

@ -6,14 +6,24 @@ import numpy as np
import csv import csv
import argparse import argparse
import os import os
import sys
from ..tags import Tree try:
# Perform relative import if included from CVSuite
from ..tags import Tree
from ..logger import C_DONE
except ImportError:
# This solution is hot garbage but I refuse to spend any more time on it
directory = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(directory, '..'))
from tags import Tree
from logger import C_DONE
parser = argparse.ArgumentParser(prog='DecisionTree CLI') parser = argparse.ArgumentParser(prog='DecisionTree CLI')
parser.add_argument('-i', '--input', help='Input CSV file', required=True) parser.add_argument('-i', '--input', help='Input CSV file', required=True)
parser.add_argument('-o', '--output', help='Output model folder', required=True) parser.add_argument('-o', '--output', help='Output model folder', required=True)
parser.add_argument('-m', '--model', help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', required=True) parser.add_argument('-m', '--model', help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', required=True)
parser.add_argument('-s', '--scaler', help='Scaler preprocesser', required=True) parser.add_argument('-s', '--scaler', help='Scaler preprocessor', required=True)
class CVSuiteTestTree: class CVSuiteTestTree:
def __init__(self, model_path = None): def __init__(self, model_path = None):
@ -83,7 +93,7 @@ class CVSuiteTestDecisionTree(CVSuiteTestTree):
ccp_alpha=0 ccp_alpha=0
) )
self.model.fit(data, labels) self.model.fit(data, labels)
self.save(output, 'decisiontree.joblib') self.save(output, 'decisiontree.pkl')
class CVSuiteTestRandomForest(CVSuiteTestTree): class CVSuiteTestRandomForest(CVSuiteTestTree):
def train(self, data, labels, output) -> None: def train(self, data, labels, output) -> None:
@ -92,13 +102,13 @@ class CVSuiteTestRandomForest(CVSuiteTestTree):
criterion='gini', criterion='gini',
) )
self.model.fit(data, labels) self.model.fit(data, labels)
self.save(output, 'randomforest.joblib') self.save(output, 'randomforest.pkl')
class CVSuiteTestExtraTrees(CVSuiteTestTree): class CVSuiteTestExtraTrees(CVSuiteTestTree):
def train(self, data, labels, output) -> None: def train(self, data, labels, output) -> None:
self.model = tree.ExtraTreeClassifier() self.model = tree.ExtraTreeClassifier()
self.model.fit(data, labels) self.model.fit(data, labels)
self.save(output, 'extratrees.joblib') self.save(output, 'extratrees.pkl')
if __name__ == "__main__": if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
@ -115,3 +125,4 @@ if __name__ == "__main__":
test.addScaler(args.scaler) test.addScaler(args.scaler)
test.trainCSV(args.input, args.output) test.trainCSV(args.input, args.output)
print(C_DONE + "Model trained successfully!")

View File

@ -8,9 +8,18 @@ import yaml
import joblib import joblib
import datetime import datetime
import os import os
import sys
from ..logger import C_DBUG try:
from ..tags import Tree # Perform relative import if included from CVSuite
from ..tags import Tree
from ..logger import C_DONE, C_DBUG
except ImportError:
# This solution is hot garbage but I refuse to spend any more time on it
directory = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(directory, '..'))
from tags import Tree
from logger import C_DONE, C_DBUG
parser = argparse.ArgumentParser(prog='KNN Train CLI') parser = argparse.ArgumentParser(prog='KNN Train CLI')
parser.add_argument('-i', '--input', help='Input CSV file', required=True) parser.add_argument('-i', '--input', help='Input CSV file', required=True)
@ -38,7 +47,7 @@ class CVSuiteTestKNN:
file.close() file.close()
header = data.pop(0) header = data.pop(0)
print("CSV tags: ", header) # print("CSV tags: ", header)
# Get classifier tags # Get classifier tags
tags_int = [] tags_int = []
@ -97,8 +106,8 @@ class CVSuiteTestKNN:
if self.trained: if self.trained:
raise EnvironmentError("Model already trained!") raise EnvironmentError("Model already trained!")
else: else:
print(data) # print(data)
print(data.shape) # print(data.shape)
self.knn.train(data, cv.ml.ROW_SAMPLE, tags) self.knn.train(data, cv.ml.ROW_SAMPLE, tags)
# Save it # Save it
@ -117,3 +126,4 @@ if __name__ == "__main__":
args = parser.parse_args() args = parser.parse_args()
test = CVSuiteTestKNN() test = CVSuiteTestKNN()
test.trainCSV(args.input, args.output) test.trainCSV(args.input, args.output)
print(C_DONE + "Model trained successfully!")