added MCC

This commit is contained in:
Tom Selier 2023-10-13 21:54:50 +02:00
parent 311070d1a6
commit c6c9b50e9d
2 changed files with 79 additions and 42 deletions

View File

@ -2,15 +2,17 @@ from enum import Enum
from sklearn import tree
from sklearn import metrics
from sklearn import preprocessing
import sklearn
from sklearn import neighbors
from sklearn import ensemble
from sklearn import svm
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import random
import csv
SIFT_PATH = "..\\algorithms\\data\\sift.csv"
# SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
class Tree(Enum):
ACCASIA = 0
@ -22,11 +24,7 @@ class Tree(Enum):
LINDE = 6
PLATAAN = 7
# [[tree1_data],[tree2_data]]
# [tree1_label, tree2_label]
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
unique_class = set(actual_class)
roc_auc_dict = {}
@ -45,10 +43,8 @@ def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
return roc_auc_dict
labels = []
i = 0
done = False
with open(SIFT_PATH, 'r') as file:
reader = csv.reader(file, delimiter= ',')
@ -66,33 +62,51 @@ with open(SIFT_PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist())
steps = np.linspace(2, 20, 10, dtype=np.int64)
steps = np.linspace(1, 50, 2, dtype=np.int64)
# steps = np.linspace(1, 100, 10, dtype=np.int64)
# steps = np.linspace(0, 0.2, 11, dtype=np.float64)
accuracy = []
precision = []
recall = []
roc = []
phi = []
for step in steps:
actual = []
predicted = []
for i in range(100):
test_index = random.randint(1, 101)
temp_data = data.pop(test_index)
temp_label = labels.pop(test_index)
del dec_tree
for i in range(len(norm)):
temp_data = norm.pop(i)
temp_label = labels.pop(i)
dec_tree = tree.DecisionTreeClassifier(
min_samples_leaf=2,
max_depth=None,
random_state=False,
criterion='gini',
splitter='best')
dec_tree = dec_tree.fit(data, labels)
result = dec_tree.predict([matrix[test_index][1:]])
# model = tree.DecisionTreeClassifier(
# min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too
# random_state=False, # No change
# criterion='gini', # MCC + 0.1
# splitter='best',
# ccp_alpha=0 # Pruning: Keep this 0
# )
# model = ensemble.RandomForestClassifier(
# criterion='gini', # gini best
# )
model = ensemble.ExtraTreesClassifier(
)
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=step,
# n_neighbors=1,
# n_jobs=-1
# )
# model = ensemble.BaggingClassifier(
# )
# model = svm.SVC(decision_function_shape='ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
del model
# normalized_list.append(temp_data)
data.append(temp_data)
norm.append(temp_data)
labels.append(temp_label)
actual.append(temp_label)
@ -102,29 +116,51 @@ for step in steps:
precision.append(metrics.precision_score(actual, predicted, average='macro'))
recall.append(metrics.recall_score(actual, predicted, average='macro'))
roc.append(roc_auc_score_multiclass(actual, predicted))
phi.append(metrics.matthews_corrcoef(actual, predicted))
print(step)
# Scores
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
plt.plot(accuracy)
plt.title("Accuracy")
plt.show()
plt.plot(precision)
plt.title("Precision")
plt.show()
plt.plot(recall)
plt.title("Recall")
plt.show()
# For all: higher is better
fig, axs = plt.subplots(2, 2)
fig.set_size_inches(12.5, 10)
axs[0, 0].plot(steps, accuracy)
axs[0, 0].set_title("Accuracy")
axs[0, 0].grid()
axs[0, 0].set_ylim(0, 1)
axs[0, 1].plot(steps, precision)
axs[0, 1].set_title("Precision")
axs[0, 1].grid()
axs[0, 1].set_ylim(0, 1)
axs[1, 0].plot(steps, recall)
axs[1, 0].set_title("Recall")
axs[1, 0].grid()
axs[1, 0].set_ylim(0, 1)
df = pd.DataFrame(roc)
plt.figure()
for i in range(7):
plt.plot(df[i], label=Tree(i).name)
plt.legend()
plt.show()
for i in range(8):
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
axs[1, 1].set_title("ROC AUC")
axs[1, 1].legend()
axs[1, 1].grid()
axs[1, 1].set_ylim(0, 1)
# Confusion matrix
c_matrix = metrics.confusion_matrix(actual, predicted)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
cm_display.plot()
plt.show(block=False)
plt.show(block=False)
# MCC
# 1 perfect prediction
# 0 random prediction
# -1 opposite prediction
plt.plot(steps, phi)
plt.title("Matthews Correlation Coefficient")
plt.grid()
plt.ylim(-1, 1)
plt.show()

View File

@ -6,7 +6,7 @@ import csv
from sklearn.preprocessing import MinMaxScaler
from enum import Enum
import random
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, matthews_corrcoef
class Tree(Enum):
ACCASIA = 0
@ -109,4 +109,5 @@ print("Accuracy score", accuracy_score(tag_true, tag_predict))
print("Precision score (macro)", precision_score(tag_true, tag_predict, average='macro'))
print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro'))
print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro'))
print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
print("MCC", matthews_corrcoef(tag_true, tag_predict))