From e16cf5a67069ceb03ec3708c306ebc1a6de9c50a Mon Sep 17 00:00:00 2001 From: Arne van Iterson Date: Sat, 21 Oct 2023 13:00:48 +0200 Subject: [PATCH] Prepare for KNN test in CVSuite --- src/experiments/knn/knn.py | 55 ++++++++++---------- src/helpers/gui/main.ui | 1 + src/helpers/test/knn.py | 103 +++++++++++++++++++++++++++++++++++++ src/suite.py | 19 ++++++- 4 files changed, 151 insertions(+), 27 deletions(-) create mode 100644 src/helpers/test/knn.py diff --git a/src/experiments/knn/knn.py b/src/experiments/knn/knn.py index d20a4e7..01f012f 100644 --- a/src/experiments/knn/knn.py +++ b/src/experiments/knn/knn.py @@ -3,9 +3,9 @@ import numpy as np import matplotlib.pyplot as plt import seaborn as sns import csv -from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler from enum import Enum import random +from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, matthews_corrcoef class Tree(Enum): @@ -19,7 +19,8 @@ class Tree(Enum): PLATAAN = 7 # Open file -file = open('dataset\\csv\\result-2023-10-14T16.13.30.csv', "r") +# file = open('dataset\\csv\\result-2023-10-14T16.13.30.csv', "r") +file = open('./out/result-2023-10-10T15.08.36.csv', "r") data = list(csv.reader(file, delimiter=",")) file.close() @@ -32,7 +33,7 @@ tags_int = [] for row in data: tree = row.pop(0) - row.pop(1) # TODO: Doe dit niet + # photoId = row.pop(1) id = Tree[tree.upper()] # print("Tree name =", tree, " id =", id.value) @@ -62,26 +63,26 @@ for idx, col in enumerate(data[0]): column = np.array(column).reshape(len(column)) # DEBUG Print resulting column - print("NORM", header[idx + 1], "\n", column) + # print("NORM", header[idx + 1], "\n", column) # Replace original data array data[:, idx] = column # # Get a random number for testing # validateId = random.randint(0, tags_len - 1) -tag_true = [] -tag_predict = [] +# tag_true = [] +# tag_predict = [] -print(tags_len) +# print(tags_len) -for validateId in range(0, tags_len - 1): - # Remove object from train set - validateTag = tags_int[validateId] - validateObj =np.array([data[validateId]]) - np.delete(tags_int, validateId) - np.delete(data, validateTag) +# for validateId in range(0, tags_len - 1): +# # Remove object from train set +# validateTag = tags_int[validateId] +# validateObj =np.array([data[validateId]]) +# np.delete(tags_int, validateId) +# np.delete(data, validateTag) - tag_true.append(validateTag) +# tag_true.append(validateTag) # print(validateTag, validateObj) @@ -89,28 +90,30 @@ for validateId in range(0, tags_len - 1): print(tags_int) print(data.dtype, type(data), tags_int.dtype, type(tags_int)) knn.train(data, cv.ml.ROW_SAMPLE, tags_int) + + knn.save('./out/models/knn_nosift.pkl') # print (data) # print('--------------------') # print (validateObj) - ret, results, neighbours ,dist = knn.findNearest(validateObj, 3) - tag_predict.append(results[0][0]) + # ret, results, neighbours ,dist = knn.findNearest(validateObj, 3) + # tag_predict.append(results[0][0]) # print( "result: {}\n".format(results) ) # print( "neighbours: {}\n".format(neighbours) ) # print( "distance: {}\n".format(dist) ) -# Create a heatmap -sns.heatmap(confusion_matrix(tag_true, tag_predict), annot=True) -plt.title( "Confusion Matrix KNN" ) -plt.show() +# # Create a heatmap +# sns.heatmap(confusion_matrix(tag_true, tag_predict), annot=True) +# plt.title( "Confusion Matrix KNN" ) +# plt.show() # Score -print("Accuracy score", accuracy_score(tag_true, tag_predict)) -print("Precision score (macro)", precision_score(tag_true, tag_predict, average='macro')) -print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro')) -print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro')) -print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro')) -print("MCC", matthews_corrcoef(tag_true, tag_predict)) \ No newline at end of file +# print("Accuracy score", accuracy_score(tag_true, tag_predict)) +# print("Precision score (macro)", precision_score(tag_true, tag_predict, average='macro')) +# print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro')) +# print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro')) +# print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro')) +# print("MCC", matthews_corrcoef(tag_true, tag_predict)) \ No newline at end of file diff --git a/src/helpers/gui/main.ui b/src/helpers/gui/main.ui index 0f899d7..eee225c 100644 --- a/src/helpers/gui/main.ui +++ b/src/helpers/gui/main.ui @@ -323,6 +323,7 @@ 15 + disabled No tests have been run yet false 25 diff --git a/src/helpers/test/knn.py b/src/helpers/test/knn.py new file mode 100644 index 0000000..35f9204 --- /dev/null +++ b/src/helpers/test/knn.py @@ -0,0 +1,103 @@ +import cv2 as cv +import numpy as np +import csv +from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler +import argparse +from enum import Enum + +parser = argparse.ArgumentParser(prog='KNN Train CLI') +parser.add_argument('-i', '--input', help='Input CSV file', required=True) +parser.add_argument('-o', '--output', help='Output model file', required=True) + +class Tree(Enum): + ACCASIA = 0 + BERK = 1 + EIK = 2 + ELS = 3 + ESDOORN = 4 + ES = 5 + LINDE = 6 + PLATAAN = 7 + +class CVSuiteTestKNN: + def __init__(self, model = None): + if model is None: + self.knn = cv.ml.KNearest_create() + self.trained = False + else: + self.knn = cv.ml.KNearest_load(model) + self.trained = True + + def trainCSV(self, path, output): + ''' + Takes preprocessed data from CVSuite, normalises it and trains the model + Function expects first two columns of the dataset to be tag and photoId, the first row should be the CSV header + ''' + file = open(path, mode='r') + data = list(csv.reader(file, delimiter=",")) + file.close() + + header = data.pop(0) + print("CSV tags: ", header) + + # Get classifier tags + tags_int = [] + + for row in data: + tree = row.pop(0) + # photoId = row.pop(1) + id = Tree[tree.upper()] + + # print("Tree name =", tree, " id =", id.value) + tags_int.append(id.value) + + # Make into numpy array cus OpenCV is dumb af + tags_len = len(tags_int) + tags_int = np.array(tags_int, dtype=np.int32) + + # Transform array for normalisation + data = np.array(data, dtype=np.float32) + + for idx, col in enumerate(data[0]): + # Get column from data + column = data[:, idx] + + # Shape it to 2 dimentional + column = np.array(column).reshape(-1, 1) + + # Perform Min - Max scaling + # scaler = MinMaxScaler() + scaler = MaxAbsScaler() + + column = scaler.fit_transform(column) + + # Reshape it back cus scaler is dumb af + column = np.array(column).reshape(len(column)) + + # DEBUG Print resulting column + # print("NORM", header[idx + 1], "\n", column) + + # Replace original data array + data[:, idx] = column + + # Pass data to train function + self.train(data, tags_int, output) + + def train(self, data, tags, output): + ''' + Data should be normalised before being passed to this function + This function should not be run from within the suite + ''' + if self.trained: + throw("Model already trained!") + else: + self.knn.train(data, cv.ml.ROW_SAMPLE, tags) + self.knn.save(output) + + def predict(self, data): + return self.knn.predict(data) + +if __name__ == "__main__": + args = parser.parse_args() + test = CVSuiteTestKNN() + test.trainCSV(args.input, args.output) \ No newline at end of file diff --git a/src/suite.py b/src/suite.py index 6ae9492..de321bd 100644 --- a/src/suite.py +++ b/src/suite.py @@ -19,6 +19,9 @@ from helpers.logger import CVSuiteLogger, C_DBUG from helpers.canvas import CVSuiteCanvas from helpers.sift import getSiftData +# Tests +from helpers.test.knn import CVSuiteTestKNN + ## UI config load PROJECT_PATH = pathlib.Path(__file__).parent PROJECT_UI = "./src/helpers/gui/main.ui" @@ -81,6 +84,9 @@ class CVSuite: ) builder.connect_callbacks(self) + # Model tests + self.test_knn = CVSuiteTestKNN(config_json["models"]["knn"]) + # Load values from config after UI has been initialised self.img_path.set(config_json["path"]) self.img_size.set(config_json["size"]) @@ -267,6 +273,15 @@ class CVSuite: self.log.add(f"Mean {label}", mean[idx]) self.log.add(f"Std {label}", std[idx]) + def runTest(self, event=None): + output = self.builder.get_object("testdata") + output.configure(state="normal") + output.delete(1.0, "end") + + output.insert("end", "test\n") + + output.configure(state="disabled") + def updatePath(self): """ Only update image name and path @@ -406,6 +421,9 @@ class CVSuite: self.log.add("SIFT total response", siftData[5]) self.log.add("SIFT average response", siftData[6]) + # Run tests + self.runTest() + # Write results to CSV file if not part_update: self.log.update() @@ -416,7 +434,6 @@ class CVSuite: plt.show(block=False) ## Graphs self.canvas.draw(size) ## Images - if __name__ == "__main__": app = CVSuite() app.run()