changes
This commit is contained in:
parent
3f384138b2
commit
8850f957ae
@ -26,8 +26,12 @@ for element in data[1:]:
|
||||
element.append(idx)
|
||||
print(element[1], new_element[1])
|
||||
|
||||
for element in data:
|
||||
print(len(element))
|
||||
|
||||
with open(OUTPUT_DIR + "combined.csv", 'w') as file:
|
||||
for element in data:
|
||||
for idx in element:
|
||||
for idx in element[:-1]:
|
||||
file.write(str(idx) + ',')
|
||||
file.write(str(element[-1]))
|
||||
file.write('\n')
|
@ -10,9 +10,9 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import random
|
||||
import csv
|
||||
import plots
|
||||
|
||||
# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
|
||||
SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
|
||||
PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\combined.csv"
|
||||
|
||||
class Tree(Enum):
|
||||
ACCASIA = 0
|
||||
@ -24,36 +24,24 @@ class Tree(Enum):
|
||||
LINDE = 6
|
||||
PLATAAN = 7
|
||||
|
||||
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
||||
#creating a set of all the unique classes using the actual class list
|
||||
unique_class = set(actual_class)
|
||||
roc_auc_dict = {}
|
||||
for per_class in unique_class:
|
||||
|
||||
#creating a list of all the classes except the current class
|
||||
other_class = [x for x in unique_class if x != per_class]
|
||||
|
||||
#marking the current class as 1 and all other classes as 0
|
||||
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
|
||||
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
|
||||
|
||||
#using the sklearn metrics method to calculate the roc_auc_score
|
||||
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
|
||||
roc_auc_dict[per_class] = roc_auc
|
||||
|
||||
return roc_auc_dict
|
||||
|
||||
labels = []
|
||||
actual_list = []
|
||||
predicted_list = []
|
||||
i = 0
|
||||
|
||||
with open(SIFT_PATH, 'r') as file:
|
||||
with open(PATH, 'r') as file:
|
||||
reader = csv.reader(file, delimiter= ',')
|
||||
matrix = list(reader)
|
||||
data = [[] for x in range(len(matrix)-1)]
|
||||
|
||||
# Load all but the headers
|
||||
for row in matrix[1:]:
|
||||
|
||||
## append data to lists
|
||||
labels.append(Tree[row[0].upper()].value)
|
||||
for element in row[1:]:
|
||||
|
||||
# append all but ID and tree
|
||||
for element in row[2:]:
|
||||
data[i].append(float(element))
|
||||
i += 1
|
||||
|
||||
@ -61,30 +49,17 @@ with open(SIFT_PATH, 'r') as file:
|
||||
normalized = preprocessing.normalize(data, axis=0, norm='max')
|
||||
norm = list(normalized.tolist())
|
||||
|
||||
steps = np.linspace(0, 9, 10, dtype=np.int64)
|
||||
# steps = np.linspace(1, 100, 10, dtype=np.int64)
|
||||
# steps = np.linspace(0, 1, 11, dtype=np.float64)
|
||||
accuracy = []
|
||||
precision = []
|
||||
recall = []
|
||||
roc = []
|
||||
phi = []
|
||||
steps = np.linspace(0.1, 1.0, 10, dtype=np.float64)
|
||||
|
||||
for step in steps:
|
||||
actual = []
|
||||
predicted = []
|
||||
# weights = {}
|
||||
# for idx, element in enumerate(Tree):
|
||||
# # print(idx, element)
|
||||
# weights[idx] = 0.1
|
||||
# weights[5] = 1
|
||||
|
||||
for i in range(len(norm)):
|
||||
temp_data = norm.pop(i)
|
||||
temp_label = labels.pop(i)
|
||||
|
||||
# model = tree.DecisionTreeClassifier(
|
||||
# # class_weight=weights,
|
||||
# class_weight=None,
|
||||
# min_samples_leaf=2,
|
||||
# max_depth=None, # < 5 is worse, None good too
|
||||
@ -98,21 +73,23 @@ for step in steps:
|
||||
# criterion='gini', # gini best
|
||||
# )
|
||||
# model = ensemble.ExtraTreesClassifier(
|
||||
# n_estimators=150 # higher is better, but slower (def: 100)
|
||||
# )
|
||||
model = neighbors.KNeighborsClassifier(
|
||||
algorithm='auto',
|
||||
leaf_size=2,
|
||||
n_neighbors=1,
|
||||
n_jobs=-1
|
||||
# model = neighbors.KNeighborsClassifier(
|
||||
# algorithm='auto',
|
||||
# leaf_size=2,
|
||||
# n_neighbors=step,
|
||||
# )
|
||||
model = ensemble.BaggingClassifier(
|
||||
n_estimators=5,
|
||||
max_samples=.5,
|
||||
max_features=.5,
|
||||
bootstrap=False
|
||||
)
|
||||
# model = ensemble.BaggingClassifier(
|
||||
# )
|
||||
# model = svm.SVC(decision_function_shape='ovr'
|
||||
# )
|
||||
model = model.fit(norm, labels)
|
||||
result = model.predict([temp_data])
|
||||
# features = model.feature_importances_
|
||||
del model
|
||||
|
||||
norm.append(temp_data)
|
||||
labels.append(temp_label)
|
||||
@ -120,63 +97,12 @@ for step in steps:
|
||||
actual.append(temp_label)
|
||||
predicted.append(result[0])
|
||||
|
||||
accuracy.append(metrics.accuracy_score(actual, predicted))
|
||||
precision.append(metrics.precision_score(actual, predicted, average='macro'))
|
||||
recall.append(metrics.recall_score(actual, predicted, average='macro'))
|
||||
roc.append(roc_auc_score_multiclass(actual, predicted))
|
||||
phi.append(metrics.matthews_corrcoef(actual, predicted))
|
||||
actual_list.append(actual)
|
||||
predicted_list.append(predicted)
|
||||
|
||||
print(step)
|
||||
|
||||
# Feature importance
|
||||
# plt.bar(matrix[0][1:], features)
|
||||
# fig, ax = plt.subplots()
|
||||
# ax.set_title("Feature Importance")
|
||||
# ax.barh(matrix[0][1:], features)
|
||||
# plt.show()
|
||||
|
||||
# Scores
|
||||
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
|
||||
# For all: higher is better
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
fig.set_size_inches(12.5, 10)
|
||||
|
||||
axs[0, 0].plot(steps, accuracy)
|
||||
axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy))
|
||||
axs[0, 0].grid()
|
||||
axs[0, 0].set_ylim(0, 1)
|
||||
|
||||
axs[0, 1].plot(steps, precision)
|
||||
axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision))
|
||||
axs[0, 1].grid()
|
||||
axs[0, 1].set_ylim(0, 1)
|
||||
|
||||
axs[1, 0].plot(steps, recall)
|
||||
axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall))
|
||||
axs[1, 0].grid()
|
||||
axs[1, 0].set_ylim(0, 1)
|
||||
|
||||
df = pd.DataFrame(roc)
|
||||
for i in range(8):
|
||||
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
|
||||
axs[1, 1].set_title("ROC AUC")
|
||||
axs[1, 1].legend()
|
||||
axs[1, 1].grid()
|
||||
axs[1, 1].set_ylim(0, 1)
|
||||
plt.show()
|
||||
|
||||
# Confusion matrix
|
||||
c_matrix = metrics.confusion_matrix(actual, predicted)
|
||||
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
|
||||
cm_display.plot()
|
||||
plt.show(block=False)
|
||||
|
||||
# MCC
|
||||
# 1 perfect prediction
|
||||
# 0 random prediction
|
||||
# -1 opposite prediction
|
||||
plt.plot(steps, phi)
|
||||
plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi))
|
||||
plt.grid()
|
||||
plt.ylim(-1, 1)
|
||||
plt.show()
|
||||
plots.plotMetrics(actual_list, predicted_list)
|
||||
plots.plotConfusion(actual_list[0], predicted_list[0])
|
||||
if (hasattr(model, "feature_importances_")):
|
||||
plots.plotFeatures(matrix[0][2:], model.feature_importances_)
|
117
src/experiments/decision_tree/plots.py
Normal file
117
src/experiments/decision_tree/plots.py
Normal file
@ -0,0 +1,117 @@
|
||||
from sklearn import metrics
|
||||
from matplotlib import pyplot as plt
|
||||
from numpy import linspace
|
||||
from enum import Enum
|
||||
|
||||
import pandas as pd
|
||||
|
||||
class Tree(Enum):
|
||||
ACCASIA = 0
|
||||
BERK = 1
|
||||
EIK = 2
|
||||
ELS = 3
|
||||
ESDOORN = 4
|
||||
ES = 5
|
||||
LINDE = 6
|
||||
PLATAAN = 7
|
||||
|
||||
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
||||
#creating a set of all the unique classes using the actual class list
|
||||
unique_class = set(actual_class)
|
||||
roc_auc_dict = {}
|
||||
for per_class in unique_class:
|
||||
|
||||
#creating a list of all the classes except the current class
|
||||
other_class = [x for x in unique_class if x != per_class]
|
||||
|
||||
#marking the current class as 1 and all other classes as 0
|
||||
new_actual_class = [0 if x in other_class else 1 for x in actual_class]
|
||||
new_pred_class = [0 if x in other_class else 1 for x in pred_class]
|
||||
|
||||
#using the sklearn metrics method to calculate the roc_auc_score
|
||||
roc_auc = metrics.roc_auc_score(new_actual_class, new_pred_class, average = average)
|
||||
roc_auc_dict[per_class] = roc_auc
|
||||
|
||||
return roc_auc_dict
|
||||
|
||||
def plotMetrics(true_list, predict_list, stepsize = 1) -> None:
|
||||
'''
|
||||
Creates fancy plots for model metrics.
|
||||
|
||||
Arguments:
|
||||
true_list ([[]]): List of lists with true tags
|
||||
predict_list ([[]]): List of lists with predicted tags
|
||||
stepsize (int): Defines x-axis step of the graphs (def: 1)
|
||||
'''
|
||||
# Source
|
||||
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
|
||||
|
||||
## Load data ##
|
||||
accuracy = []
|
||||
precision = []
|
||||
recall = []
|
||||
roc = []
|
||||
mcc = []
|
||||
steps = linspace(0, len(true_list)-1, int(len(true_list)/stepsize))
|
||||
|
||||
for true, predict in zip(true_list, predict_list):
|
||||
assert len(true) == len(predict)
|
||||
accuracy.append(metrics.accuracy_score(true, predict))
|
||||
precision.append(metrics.precision_score(true, predict, average="macro"))
|
||||
recall.append(metrics.recall_score(true, predict, average="macro"))
|
||||
roc.append(roc_auc_score_multiclass(true, predict))
|
||||
mcc.append(metrics.matthews_corrcoef(true, predict))
|
||||
|
||||
## Plots ##
|
||||
fig, axs = plt.subplots(3, 2)
|
||||
|
||||
# Accuracy
|
||||
axs[0, 0].plot(steps, accuracy)
|
||||
axs[0, 0].set_title("Accuracy")
|
||||
axs[0, 0].set_ylim(0, 1)
|
||||
axs[0, 0].grid()
|
||||
|
||||
# Precision
|
||||
axs[0, 1].plot(steps, precision)
|
||||
axs[0, 1].set_title("Precision")
|
||||
axs[0, 1].set_ylim(0, 1)
|
||||
axs[0, 1].grid()
|
||||
|
||||
# Recall
|
||||
axs[1, 0].plot(steps, recall)
|
||||
axs[1, 0].set_title("Recall")
|
||||
axs[1, 0].set_ylim(0, 1)
|
||||
axs[1, 0].grid()
|
||||
|
||||
# ROC
|
||||
df = pd.DataFrame(roc)
|
||||
for i in range(8):
|
||||
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
|
||||
axs[1, 1].set_title("ROC AUC")
|
||||
axs[1, 1].legend()
|
||||
axs[1, 1].grid()
|
||||
axs[1, 1].set_ylim(0, 1)
|
||||
|
||||
# MCC
|
||||
axs[2, 0].plot(steps, mcc)
|
||||
axs[2, 0].set_title("MCC")
|
||||
axs[2, 0].grid()
|
||||
axs[2, 0].set_ylim(-1, 1)
|
||||
|
||||
plt.show()
|
||||
return
|
||||
|
||||
def plotConfusion(actual, predicted) -> None:
|
||||
matrix = metrics.confusion_matrix(actual, predicted)
|
||||
plot = metrics.ConfusionMatrixDisplay(confusion_matrix=matrix)
|
||||
plot.plot()
|
||||
|
||||
plt.show()
|
||||
return
|
||||
|
||||
def plotFeatures(names, features) -> None:
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_title("Feature Importance")
|
||||
ax.barh(names, features)
|
||||
plt.show()
|
||||
pass
|
Loading…
Reference in New Issue
Block a user