This commit is contained in:
Tom Selier 2023-10-20 11:09:38 +02:00
parent 3f384138b2
commit 8850f957ae
3 changed files with 151 additions and 104 deletions

View File

@ -26,8 +26,12 @@ for element in data[1:]:
element.append(idx)
print(element[1], new_element[1])
for element in data:
print(len(element))
with open(OUTPUT_DIR + "combined.csv", 'w') as file:
for element in data:
for idx in element:
for idx in element[:-1]:
file.write(str(idx) + ',')
file.write(str(element[-1]))
file.write('\n')

View File

@ -10,9 +10,9 @@ import pandas as pd
import numpy as np
import random
import csv
import plots
# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\combined.csv"
class Tree(Enum):
ACCASIA = 0
@ -24,36 +24,24 @@ class Tree(Enum):
LINDE = 6
PLATAAN = 7
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
unique_class = set(actual_class)
roc_auc_dict = {}
for per_class in unique_class:
#creating a list of all the classes except the current class
other_class = [x for x in unique_class if x != per_class]
#marking the current class as 1 and all other classes as 0
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
#using the sklearn metrics method to calculate the roc_auc_score
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
roc_auc_dict[per_class] = roc_auc
return roc_auc_dict
labels = []
actual_list = []
predicted_list = []
i = 0
with open(SIFT_PATH, 'r') as file:
with open(PATH, 'r') as file:
reader = csv.reader(file, delimiter= ',')
matrix = list(reader)
data = [[] for x in range(len(matrix)-1)]
# Load all but the headers
for row in matrix[1:]:
## append data to lists
labels.append(Tree[row[0].upper()].value)
for element in row[1:]:
# append all but ID and tree
for element in row[2:]:
data[i].append(float(element))
i += 1
@ -61,30 +49,17 @@ with open(SIFT_PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist())
steps = np.linspace(0, 9, 10, dtype=np.int64)
# steps = np.linspace(1, 100, 10, dtype=np.int64)
# steps = np.linspace(0, 1, 11, dtype=np.float64)
accuracy = []
precision = []
recall = []
roc = []
phi = []
steps = np.linspace(0.1, 1.0, 10, dtype=np.float64)
for step in steps:
actual = []
predicted = []
# weights = {}
# for idx, element in enumerate(Tree):
# # print(idx, element)
# weights[idx] = 0.1
# weights[5] = 1
for i in range(len(norm)):
temp_data = norm.pop(i)
temp_label = labels.pop(i)
# model = tree.DecisionTreeClassifier(
# # class_weight=weights,
# class_weight=None,
# min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too
@ -98,21 +73,23 @@ for step in steps:
# criterion='gini', # gini best
# )
# model = ensemble.ExtraTreesClassifier(
# n_estimators=150 # higher is better, but slower (def: 100)
# )
model = neighbors.KNeighborsClassifier(
algorithm='auto',
leaf_size=2,
n_neighbors=1,
n_jobs=-1
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=2,
# n_neighbors=step,
# )
model = ensemble.BaggingClassifier(
n_estimators=5,
max_samples=.5,
max_features=.5,
bootstrap=False
)
# model = ensemble.BaggingClassifier(
# )
# model = svm.SVC(decision_function_shape='ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
# features = model.feature_importances_
del model
norm.append(temp_data)
labels.append(temp_label)
@ -120,63 +97,12 @@ for step in steps:
actual.append(temp_label)
predicted.append(result[0])
accuracy.append(metrics.accuracy_score(actual, predicted))
precision.append(metrics.precision_score(actual, predicted, average='macro'))
recall.append(metrics.recall_score(actual, predicted, average='macro'))
roc.append(roc_auc_score_multiclass(actual, predicted))
phi.append(metrics.matthews_corrcoef(actual, predicted))
actual_list.append(actual)
predicted_list.append(predicted)
print(step)
# Feature importance
# plt.bar(matrix[0][1:], features)
# fig, ax = plt.subplots()
# ax.set_title("Feature Importance")
# ax.barh(matrix[0][1:], features)
# plt.show()
# Scores
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
# For all: higher is better
fig, axs = plt.subplots(2, 2)
fig.set_size_inches(12.5, 10)
axs[0, 0].plot(steps, accuracy)
axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy))
axs[0, 0].grid()
axs[0, 0].set_ylim(0, 1)
axs[0, 1].plot(steps, precision)
axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision))
axs[0, 1].grid()
axs[0, 1].set_ylim(0, 1)
axs[1, 0].plot(steps, recall)
axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall))
axs[1, 0].grid()
axs[1, 0].set_ylim(0, 1)
df = pd.DataFrame(roc)
for i in range(8):
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
axs[1, 1].set_title("ROC AUC")
axs[1, 1].legend()
axs[1, 1].grid()
axs[1, 1].set_ylim(0, 1)
plt.show()
# Confusion matrix
c_matrix = metrics.confusion_matrix(actual, predicted)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
cm_display.plot()
plt.show(block=False)
# MCC
# 1 perfect prediction
# 0 random prediction
# -1 opposite prediction
plt.plot(steps, phi)
plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi))
plt.grid()
plt.ylim(-1, 1)
plt.show()
plots.plotMetrics(actual_list, predicted_list)
plots.plotConfusion(actual_list[0], predicted_list[0])
if (hasattr(model, "feature_importances_")):
plots.plotFeatures(matrix[0][2:], model.feature_importances_)

View File

@ -0,0 +1,117 @@
from sklearn import metrics
from matplotlib import pyplot as plt
from numpy import linspace
from enum import Enum
import pandas as pd
class Tree(Enum):
ACCASIA = 0
BERK = 1
EIK = 2
ELS = 3
ESDOORN = 4
ES = 5
LINDE = 6
PLATAAN = 7
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
unique_class = set(actual_class)
roc_auc_dict = {}
for per_class in unique_class:
#creating a list of all the classes except the current class
other_class = [x for x in unique_class if x != per_class]
#marking the current class as 1 and all other classes as 0
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
#using the sklearn metrics method to calculate the roc_auc_score
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
roc_auc_dict[per_class] = roc_auc
return roc_auc_dict
def plotMetrics(true_list, predict_list, stepsize = 1) -> None:
'''
Creates fancy plots for model metrics.
Arguments:
true_list ([[]]): List of lists with true tags
predict_list ([[]]): List of lists with predicted tags
stepsize (int): Defines x-axis step of the graphs (def: 1)
'''
# Source
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
## Load data ##
accuracy = []
precision = []
recall = []
roc = []
mcc = []
steps = linspace(0, len(true_list)-1, int(len(true_list)/stepsize))
for true, predict in zip(true_list, predict_list):
assert len(true) == len(predict)
accuracy.append(metrics.accuracy_score(true, predict))
precision.append(metrics.precision_score(true, predict, average="macro"))
recall.append(metrics.recall_score(true, predict, average="macro"))
roc.append(roc_auc_score_multiclass(true, predict))
mcc.append(metrics.matthews_corrcoef(true, predict))
## Plots ##
fig, axs = plt.subplots(3, 2)
# Accuracy
axs[0, 0].plot(steps, accuracy)
axs[0, 0].set_title("Accuracy")
axs[0, 0].set_ylim(0, 1)
axs[0, 0].grid()
# Precision
axs[0, 1].plot(steps, precision)
axs[0, 1].set_title("Precision")
axs[0, 1].set_ylim(0, 1)
axs[0, 1].grid()
# Recall
axs[1, 0].plot(steps, recall)
axs[1, 0].set_title("Recall")
axs[1, 0].set_ylim(0, 1)
axs[1, 0].grid()
# ROC
df = pd.DataFrame(roc)
for i in range(8):
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
axs[1, 1].set_title("ROC AUC")
axs[1, 1].legend()
axs[1, 1].grid()
axs[1, 1].set_ylim(0, 1)
# MCC
axs[2, 0].plot(steps, mcc)
axs[2, 0].set_title("MCC")
axs[2, 0].grid()
axs[2, 0].set_ylim(-1, 1)
plt.show()
return
def plotConfusion(actual, predicted) -> None:
matrix = metrics.confusion_matrix(actual, predicted)
plot = metrics.ConfusionMatrixDisplay(confusion_matrix=matrix)
plot.plot()
plt.show()
return
def plotFeatures(names, features) -> None:
fig, ax = plt.subplots()
ax.set_title("Feature Importance")
ax.barh(names, features)
plt.show()
pass