changes
This commit is contained in:
parent
3f384138b2
commit
8850f957ae
@ -26,8 +26,12 @@ for element in data[1:]:
|
|||||||
element.append(idx)
|
element.append(idx)
|
||||||
print(element[1], new_element[1])
|
print(element[1], new_element[1])
|
||||||
|
|
||||||
|
for element in data:
|
||||||
|
print(len(element))
|
||||||
|
|
||||||
with open(OUTPUT_DIR + "combined.csv", 'w') as file:
|
with open(OUTPUT_DIR + "combined.csv", 'w') as file:
|
||||||
for element in data:
|
for element in data:
|
||||||
for idx in element:
|
for idx in element[:-1]:
|
||||||
file.write(str(idx) + ',')
|
file.write(str(idx) + ',')
|
||||||
|
file.write(str(element[-1]))
|
||||||
file.write('\n')
|
file.write('\n')
|
@ -10,9 +10,9 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
import csv
|
import csv
|
||||||
|
import plots
|
||||||
|
|
||||||
# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
|
PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\combined.csv"
|
||||||
SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
|
|
||||||
|
|
||||||
class Tree(Enum):
|
class Tree(Enum):
|
||||||
ACCASIA = 0
|
ACCASIA = 0
|
||||||
@ -24,36 +24,24 @@ class Tree(Enum):
|
|||||||
LINDE = 6
|
LINDE = 6
|
||||||
PLATAAN = 7
|
PLATAAN = 7
|
||||||
|
|
||||||
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
|
||||||
#creating a set of all the unique classes using the actual class list
|
|
||||||
unique_class = set(actual_class)
|
|
||||||
roc_auc_dict = {}
|
|
||||||
for per_class in unique_class:
|
|
||||||
|
|
||||||
#creating a list of all the classes except the current class
|
|
||||||
other_class = [x for x in unique_class if x != per_class]
|
|
||||||
|
|
||||||
#marking the current class as 1 and all other classes as 0
|
|
||||||
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
|
|
||||||
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
|
|
||||||
|
|
||||||
#using the sklearn metrics method to calculate the roc_auc_score
|
|
||||||
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
|
|
||||||
roc_auc_dict[per_class] = roc_auc
|
|
||||||
|
|
||||||
return roc_auc_dict
|
|
||||||
|
|
||||||
labels = []
|
labels = []
|
||||||
|
actual_list = []
|
||||||
|
predicted_list = []
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
with open(SIFT_PATH, 'r') as file:
|
with open(PATH, 'r') as file:
|
||||||
reader = csv.reader(file, delimiter= ',')
|
reader = csv.reader(file, delimiter= ',')
|
||||||
matrix = list(reader)
|
matrix = list(reader)
|
||||||
data = [[] for x in range(len(matrix)-1)]
|
data = [[] for x in range(len(matrix)-1)]
|
||||||
|
|
||||||
|
# Load all but the headers
|
||||||
for row in matrix[1:]:
|
for row in matrix[1:]:
|
||||||
|
|
||||||
## append data to lists
|
## append data to lists
|
||||||
labels.append(Tree[row[0].upper()].value)
|
labels.append(Tree[row[0].upper()].value)
|
||||||
for element in row[1:]:
|
|
||||||
|
# append all but ID and tree
|
||||||
|
for element in row[2:]:
|
||||||
data[i].append(float(element))
|
data[i].append(float(element))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
@ -61,30 +49,17 @@ with open(SIFT_PATH, 'r') as file:
|
|||||||
normalized = preprocessing.normalize(data, axis=0, norm='max')
|
normalized = preprocessing.normalize(data, axis=0, norm='max')
|
||||||
norm = list(normalized.tolist())
|
norm = list(normalized.tolist())
|
||||||
|
|
||||||
steps = np.linspace(0, 9, 10, dtype=np.int64)
|
steps = np.linspace(0.1, 1.0, 10, dtype=np.float64)
|
||||||
# steps = np.linspace(1, 100, 10, dtype=np.int64)
|
|
||||||
# steps = np.linspace(0, 1, 11, dtype=np.float64)
|
|
||||||
accuracy = []
|
|
||||||
precision = []
|
|
||||||
recall = []
|
|
||||||
roc = []
|
|
||||||
phi = []
|
|
||||||
|
|
||||||
for step in steps:
|
for step in steps:
|
||||||
actual = []
|
actual = []
|
||||||
predicted = []
|
predicted = []
|
||||||
# weights = {}
|
|
||||||
# for idx, element in enumerate(Tree):
|
|
||||||
# # print(idx, element)
|
|
||||||
# weights[idx] = 0.1
|
|
||||||
# weights[5] = 1
|
|
||||||
|
|
||||||
for i in range(len(norm)):
|
for i in range(len(norm)):
|
||||||
temp_data = norm.pop(i)
|
temp_data = norm.pop(i)
|
||||||
temp_label = labels.pop(i)
|
temp_label = labels.pop(i)
|
||||||
|
|
||||||
# model = tree.DecisionTreeClassifier(
|
# model = tree.DecisionTreeClassifier(
|
||||||
# # class_weight=weights,
|
|
||||||
# class_weight=None,
|
# class_weight=None,
|
||||||
# min_samples_leaf=2,
|
# min_samples_leaf=2,
|
||||||
# max_depth=None, # < 5 is worse, None good too
|
# max_depth=None, # < 5 is worse, None good too
|
||||||
@ -98,21 +73,23 @@ for step in steps:
|
|||||||
# criterion='gini', # gini best
|
# criterion='gini', # gini best
|
||||||
# )
|
# )
|
||||||
# model = ensemble.ExtraTreesClassifier(
|
# model = ensemble.ExtraTreesClassifier(
|
||||||
|
# n_estimators=150 # higher is better, but slower (def: 100)
|
||||||
# )
|
# )
|
||||||
model = neighbors.KNeighborsClassifier(
|
# model = neighbors.KNeighborsClassifier(
|
||||||
algorithm='auto',
|
# algorithm='auto',
|
||||||
leaf_size=2,
|
# leaf_size=2,
|
||||||
n_neighbors=1,
|
# n_neighbors=step,
|
||||||
n_jobs=-1
|
# )
|
||||||
|
model = ensemble.BaggingClassifier(
|
||||||
|
n_estimators=5,
|
||||||
|
max_samples=.5,
|
||||||
|
max_features=.5,
|
||||||
|
bootstrap=False
|
||||||
)
|
)
|
||||||
# model = ensemble.BaggingClassifier(
|
|
||||||
# )
|
|
||||||
# model = svm.SVC(decision_function_shape='ovr'
|
# model = svm.SVC(decision_function_shape='ovr'
|
||||||
# )
|
# )
|
||||||
model = model.fit(norm, labels)
|
model = model.fit(norm, labels)
|
||||||
result = model.predict([temp_data])
|
result = model.predict([temp_data])
|
||||||
# features = model.feature_importances_
|
|
||||||
del model
|
|
||||||
|
|
||||||
norm.append(temp_data)
|
norm.append(temp_data)
|
||||||
labels.append(temp_label)
|
labels.append(temp_label)
|
||||||
@ -120,63 +97,12 @@ for step in steps:
|
|||||||
actual.append(temp_label)
|
actual.append(temp_label)
|
||||||
predicted.append(result[0])
|
predicted.append(result[0])
|
||||||
|
|
||||||
accuracy.append(metrics.accuracy_score(actual, predicted))
|
actual_list.append(actual)
|
||||||
precision.append(metrics.precision_score(actual, predicted, average='macro'))
|
predicted_list.append(predicted)
|
||||||
recall.append(metrics.recall_score(actual, predicted, average='macro'))
|
|
||||||
roc.append(roc_auc_score_multiclass(actual, predicted))
|
|
||||||
phi.append(metrics.matthews_corrcoef(actual, predicted))
|
|
||||||
|
|
||||||
print(step)
|
print(step)
|
||||||
|
|
||||||
# Feature importance
|
plots.plotMetrics(actual_list, predicted_list)
|
||||||
# plt.bar(matrix[0][1:], features)
|
plots.plotConfusion(actual_list[0], predicted_list[0])
|
||||||
# fig, ax = plt.subplots()
|
if (hasattr(model, "feature_importances_")):
|
||||||
# ax.set_title("Feature Importance")
|
plots.plotFeatures(matrix[0][2:], model.feature_importances_)
|
||||||
# ax.barh(matrix[0][1:], features)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
# Scores
|
|
||||||
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
|
|
||||||
# For all: higher is better
|
|
||||||
fig, axs = plt.subplots(2, 2)
|
|
||||||
fig.set_size_inches(12.5, 10)
|
|
||||||
|
|
||||||
axs[0, 0].plot(steps, accuracy)
|
|
||||||
axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy))
|
|
||||||
axs[0, 0].grid()
|
|
||||||
axs[0, 0].set_ylim(0, 1)
|
|
||||||
|
|
||||||
axs[0, 1].plot(steps, precision)
|
|
||||||
axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision))
|
|
||||||
axs[0, 1].grid()
|
|
||||||
axs[0, 1].set_ylim(0, 1)
|
|
||||||
|
|
||||||
axs[1, 0].plot(steps, recall)
|
|
||||||
axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall))
|
|
||||||
axs[1, 0].grid()
|
|
||||||
axs[1, 0].set_ylim(0, 1)
|
|
||||||
|
|
||||||
df = pd.DataFrame(roc)
|
|
||||||
for i in range(8):
|
|
||||||
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
|
|
||||||
axs[1, 1].set_title("ROC AUC")
|
|
||||||
axs[1, 1].legend()
|
|
||||||
axs[1, 1].grid()
|
|
||||||
axs[1, 1].set_ylim(0, 1)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# Confusion matrix
|
|
||||||
c_matrix = metrics.confusion_matrix(actual, predicted)
|
|
||||||
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
|
|
||||||
cm_display.plot()
|
|
||||||
plt.show(block=False)
|
|
||||||
|
|
||||||
# MCC
|
|
||||||
# 1 perfect prediction
|
|
||||||
# 0 random prediction
|
|
||||||
# -1 opposite prediction
|
|
||||||
plt.plot(steps, phi)
|
|
||||||
plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi))
|
|
||||||
plt.grid()
|
|
||||||
plt.ylim(-1, 1)
|
|
||||||
plt.show()
|
|
117
src/experiments/decision_tree/plots.py
Normal file
117
src/experiments/decision_tree/plots.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
from sklearn import metrics
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
from numpy import linspace
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class Tree(Enum):
|
||||||
|
ACCASIA = 0
|
||||||
|
BERK = 1
|
||||||
|
EIK = 2
|
||||||
|
ELS = 3
|
||||||
|
ESDOORN = 4
|
||||||
|
ES = 5
|
||||||
|
LINDE = 6
|
||||||
|
PLATAAN = 7
|
||||||
|
|
||||||
|
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
||||||
|
#creating a set of all the unique classes using the actual class list
|
||||||
|
unique_class = set(actual_class)
|
||||||
|
roc_auc_dict = {}
|
||||||
|
for per_class in unique_class:
|
||||||
|
|
||||||
|
#creating a list of all the classes except the current class
|
||||||
|
other_class = [x for x in unique_class if x != per_class]
|
||||||
|
|
||||||
|
#marking the current class as 1 and all other classes as 0
|
||||||
|
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
|
||||||
|
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
|
||||||
|
|
||||||
|
#using the sklearn metrics method to calculate the roc_auc_score
|
||||||
|
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
|
||||||
|
roc_auc_dict[per_class] = roc_auc
|
||||||
|
|
||||||
|
return roc_auc_dict
|
||||||
|
|
||||||
|
def plotMetrics(true_list, predict_list, stepsize = 1) -> None:
|
||||||
|
'''
|
||||||
|
Creates fancy plots for model metrics.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
true_list ([[]]): List of lists with true tags
|
||||||
|
predict_list ([[]]): List of lists with predicted tags
|
||||||
|
stepsize (int): Defines x-axis step of the graphs (def: 1)
|
||||||
|
'''
|
||||||
|
# Source
|
||||||
|
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
|
||||||
|
|
||||||
|
## Load data ##
|
||||||
|
accuracy = []
|
||||||
|
precision = []
|
||||||
|
recall = []
|
||||||
|
roc = []
|
||||||
|
mcc = []
|
||||||
|
steps = linspace(0, len(true_list)-1, int(len(true_list)/stepsize))
|
||||||
|
|
||||||
|
for true, predict in zip(true_list, predict_list):
|
||||||
|
assert len(true) == len(predict)
|
||||||
|
accuracy.append(metrics.accuracy_score(true, predict))
|
||||||
|
precision.append(metrics.precision_score(true, predict, average="macro"))
|
||||||
|
recall.append(metrics.recall_score(true, predict, average="macro"))
|
||||||
|
roc.append(roc_auc_score_multiclass(true, predict))
|
||||||
|
mcc.append(metrics.matthews_corrcoef(true, predict))
|
||||||
|
|
||||||
|
## Plots ##
|
||||||
|
fig, axs = plt.subplots(3, 2)
|
||||||
|
|
||||||
|
# Accuracy
|
||||||
|
axs[0, 0].plot(steps, accuracy)
|
||||||
|
axs[0, 0].set_title("Accuracy")
|
||||||
|
axs[0, 0].set_ylim(0, 1)
|
||||||
|
axs[0, 0].grid()
|
||||||
|
|
||||||
|
# Precision
|
||||||
|
axs[0, 1].plot(steps, precision)
|
||||||
|
axs[0, 1].set_title("Precision")
|
||||||
|
axs[0, 1].set_ylim(0, 1)
|
||||||
|
axs[0, 1].grid()
|
||||||
|
|
||||||
|
# Recall
|
||||||
|
axs[1, 0].plot(steps, recall)
|
||||||
|
axs[1, 0].set_title("Recall")
|
||||||
|
axs[1, 0].set_ylim(0, 1)
|
||||||
|
axs[1, 0].grid()
|
||||||
|
|
||||||
|
# ROC
|
||||||
|
df = pd.DataFrame(roc)
|
||||||
|
for i in range(8):
|
||||||
|
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
|
||||||
|
axs[1, 1].set_title("ROC AUC")
|
||||||
|
axs[1, 1].legend()
|
||||||
|
axs[1, 1].grid()
|
||||||
|
axs[1, 1].set_ylim(0, 1)
|
||||||
|
|
||||||
|
# MCC
|
||||||
|
axs[2, 0].plot(steps, mcc)
|
||||||
|
axs[2, 0].set_title("MCC")
|
||||||
|
axs[2, 0].grid()
|
||||||
|
axs[2, 0].set_ylim(-1, 1)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
return
|
||||||
|
|
||||||
|
def plotConfusion(actual, predicted) -> None:
|
||||||
|
matrix = metrics.confusion_matrix(actual, predicted)
|
||||||
|
plot = metrics.ConfusionMatrixDisplay(confusion_matrix=matrix)
|
||||||
|
plot.plot()
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
return
|
||||||
|
|
||||||
|
def plotFeatures(names, features) -> None:
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.set_title("Feature Importance")
|
||||||
|
ax.barh(names, features)
|
||||||
|
plt.show()
|
||||||
|
pass
|
Loading…
Reference in New Issue
Block a user