"Fixed" imports for CLI tools
This commit is contained in:
parent
49fd6576a0
commit
4dcd021490
19
README.md
19
README.md
@ -84,15 +84,30 @@ $ python ./src/suite.py
|
|||||||
1. Open CVSuite and select the desired training set
|
1. Open CVSuite and select the desired training set
|
||||||
2. Press 'Run analysis for entire dataset(!)', this will export a CSV file with all preprocessed data in the `./out` directory
|
2. Press 'Run analysis for entire dataset(!)', this will export a CSV file with all preprocessed data in the `./out` directory
|
||||||
- Based on your system configuration, this might take a while
|
- Based on your system configuration, this might take a while
|
||||||
3. Run the CVSuiteTestKNN CLI tool:
|
3. Run the CVSuiteTestKNN CLI tool, the following arguments are required:
|
||||||
|
- `-i` Input CSV file
|
||||||
|
- `-o` Output folder, likely `./out/models`
|
||||||
```sh
|
```sh
|
||||||
$ python ./src/helpers/test/knn.py -i ./out/result-(date/time).csv -o ./out/models/
|
$ python ./src/helpers/test/knn.py -i ./out/result-(date/time).csv -o ./out/models/
|
||||||
```
|
```
|
||||||
4. Edit your `config.json` to include the newly created model
|
4. The script generates two files; A fitted scaler to use with other models (`.pkl` file) and the model itself (`.yaml` file)
|
||||||
|
5. Edit your `config.json` to include the newly created model
|
||||||
|
|
||||||
|
### Train and export a Decision tree model
|
||||||
> :memo: **Please note:**<br>
|
> :memo: **Please note:**<br>
|
||||||
> The KNN Training script also generates the scaler required to make the decision tree model
|
> The KNN Training script also generates the scaler required to make the decision tree model
|
||||||
|
|
||||||
|
1. Run the CVSuiteTestTree CLI Tool using the following arguments:
|
||||||
|
- `-i` Input CSV file
|
||||||
|
- `-o` Output folder, likely `./out/models`
|
||||||
|
- `-m` Model to train; `dectree`, `randforest` or `extratree`
|
||||||
|
- `-s` Scaler file to use (`.pkl` file)
|
||||||
|
```sh
|
||||||
|
python ./src/helpers/test/decision_tree.py -i ./out/result-(date/time).csv -o ./out/models/ -m 'dectree' -s ./out/models/scale_(date/time).pkl
|
||||||
|
```
|
||||||
|
2. The script generates one `.pkl` file based on the chosen model
|
||||||
|
3. Edit your `config.json` to include the newly created model
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
Arne van Iterson<br>
|
Arne van Iterson<br>
|
||||||
|
2
src/helpers/test/__init__.py
Normal file
2
src/helpers/test/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .decision_tree import *
|
||||||
|
from .knn import *
|
@ -6,14 +6,24 @@ import numpy as np
|
|||||||
import csv
|
import csv
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from ..tags import Tree
|
try:
|
||||||
|
# Perform relative import if included from CVSuite
|
||||||
|
from ..tags import Tree
|
||||||
|
from ..logger import C_DONE
|
||||||
|
except ImportError:
|
||||||
|
# This solution is hot garbage but I refuse to spend any more time on it
|
||||||
|
directory = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.append(os.path.join(directory, '..'))
|
||||||
|
from tags import Tree
|
||||||
|
from logger import C_DONE
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(prog='DecisionTree CLI')
|
parser = argparse.ArgumentParser(prog='DecisionTree CLI')
|
||||||
parser.add_argument('-i', '--input', help='Input CSV file', required=True)
|
parser.add_argument('-i', '--input', help='Input CSV file', required=True)
|
||||||
parser.add_argument('-o', '--output', help='Output model folder', required=True)
|
parser.add_argument('-o', '--output', help='Output model folder', required=True)
|
||||||
parser.add_argument('-m', '--model', help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', required=True)
|
parser.add_argument('-m', '--model', help='Chosen model (\'dectree\', \'randforest\' or \'extratree\')', required=True)
|
||||||
parser.add_argument('-s', '--scaler', help='Scaler preprocesser', required=True)
|
parser.add_argument('-s', '--scaler', help='Scaler preprocessor', required=True)
|
||||||
|
|
||||||
class CVSuiteTestTree:
|
class CVSuiteTestTree:
|
||||||
def __init__(self, model_path = None):
|
def __init__(self, model_path = None):
|
||||||
@ -83,7 +93,7 @@ class CVSuiteTestDecisionTree(CVSuiteTestTree):
|
|||||||
ccp_alpha=0
|
ccp_alpha=0
|
||||||
)
|
)
|
||||||
self.model.fit(data, labels)
|
self.model.fit(data, labels)
|
||||||
self.save(output, 'decisiontree.joblib')
|
self.save(output, 'decisiontree.pkl')
|
||||||
|
|
||||||
class CVSuiteTestRandomForest(CVSuiteTestTree):
|
class CVSuiteTestRandomForest(CVSuiteTestTree):
|
||||||
def train(self, data, labels, output) -> None:
|
def train(self, data, labels, output) -> None:
|
||||||
@ -92,13 +102,13 @@ class CVSuiteTestRandomForest(CVSuiteTestTree):
|
|||||||
criterion='gini',
|
criterion='gini',
|
||||||
)
|
)
|
||||||
self.model.fit(data, labels)
|
self.model.fit(data, labels)
|
||||||
self.save(output, 'randomforest.joblib')
|
self.save(output, 'randomforest.pkl')
|
||||||
|
|
||||||
class CVSuiteTestExtraTrees(CVSuiteTestTree):
|
class CVSuiteTestExtraTrees(CVSuiteTestTree):
|
||||||
def train(self, data, labels, output) -> None:
|
def train(self, data, labels, output) -> None:
|
||||||
self.model = tree.ExtraTreeClassifier()
|
self.model = tree.ExtraTreeClassifier()
|
||||||
self.model.fit(data, labels)
|
self.model.fit(data, labels)
|
||||||
self.save(output, 'extratrees.joblib')
|
self.save(output, 'extratrees.pkl')
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -115,3 +125,4 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
test.addScaler(args.scaler)
|
test.addScaler(args.scaler)
|
||||||
test.trainCSV(args.input, args.output)
|
test.trainCSV(args.input, args.output)
|
||||||
|
print(C_DONE + "Model trained successfully!")
|
@ -8,9 +8,18 @@ import yaml
|
|||||||
import joblib
|
import joblib
|
||||||
import datetime
|
import datetime
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from ..logger import C_DBUG
|
try:
|
||||||
from ..tags import Tree
|
# Perform relative import if included from CVSuite
|
||||||
|
from ..tags import Tree
|
||||||
|
from ..logger import C_DONE, C_DBUG
|
||||||
|
except ImportError:
|
||||||
|
# This solution is hot garbage but I refuse to spend any more time on it
|
||||||
|
directory = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.append(os.path.join(directory, '..'))
|
||||||
|
from tags import Tree
|
||||||
|
from logger import C_DONE, C_DBUG
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(prog='KNN Train CLI')
|
parser = argparse.ArgumentParser(prog='KNN Train CLI')
|
||||||
parser.add_argument('-i', '--input', help='Input CSV file', required=True)
|
parser.add_argument('-i', '--input', help='Input CSV file', required=True)
|
||||||
@ -38,7 +47,7 @@ class CVSuiteTestKNN:
|
|||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
header = data.pop(0)
|
header = data.pop(0)
|
||||||
print("CSV tags: ", header)
|
# print("CSV tags: ", header)
|
||||||
|
|
||||||
# Get classifier tags
|
# Get classifier tags
|
||||||
tags_int = []
|
tags_int = []
|
||||||
@ -97,8 +106,8 @@ class CVSuiteTestKNN:
|
|||||||
if self.trained:
|
if self.trained:
|
||||||
raise EnvironmentError("Model already trained!")
|
raise EnvironmentError("Model already trained!")
|
||||||
else:
|
else:
|
||||||
print(data)
|
# print(data)
|
||||||
print(data.shape)
|
# print(data.shape)
|
||||||
self.knn.train(data, cv.ml.ROW_SAMPLE, tags)
|
self.knn.train(data, cv.ml.ROW_SAMPLE, tags)
|
||||||
|
|
||||||
# Save it
|
# Save it
|
||||||
@ -117,3 +126,4 @@ if __name__ == "__main__":
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
test = CVSuiteTestKNN()
|
test = CVSuiteTestKNN()
|
||||||
test.trainCSV(args.input, args.output)
|
test.trainCSV(args.input, args.output)
|
||||||
|
print(C_DONE + "Model trained successfully!")
|
Loading…
Reference in New Issue
Block a user