added MCC

This commit is contained in:
Tom Selier 2023-10-13 21:54:50 +02:00
parent 311070d1a6
commit c6c9b50e9d
2 changed files with 79 additions and 42 deletions

View File

@ -2,15 +2,17 @@ from enum import Enum
from sklearn import tree from sklearn import tree
from sklearn import metrics from sklearn import metrics
from sklearn import preprocessing from sklearn import preprocessing
import sklearn from sklearn import neighbors
from sklearn import ensemble
from sklearn import svm
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import random import random
import csv import csv
SIFT_PATH = "..\\algorithms\\data\\sift.csv" # SIFT_PATH = "..\\algorithms\\data\\sift.csv"
# SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv" SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
class Tree(Enum): class Tree(Enum):
ACCASIA = 0 ACCASIA = 0
@ -22,11 +24,7 @@ class Tree(Enum):
LINDE = 6 LINDE = 6
PLATAAN = 7 PLATAAN = 7
# [[tree1_data],[tree2_data]]
# [tree1_label, tree2_label]
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"): def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list #creating a set of all the unique classes using the actual class list
unique_class = set(actual_class) unique_class = set(actual_class)
roc_auc_dict = {} roc_auc_dict = {}
@ -45,10 +43,8 @@ def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
return roc_auc_dict return roc_auc_dict
labels = [] labels = []
i = 0 i = 0
done = False
with open(SIFT_PATH, 'r') as file: with open(SIFT_PATH, 'r') as file:
reader = csv.reader(file, delimiter= ',') reader = csv.reader(file, delimiter= ',')
@ -66,33 +62,51 @@ with open(SIFT_PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max') normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist()) norm = list(normalized.tolist())
steps = np.linspace(2, 20, 10, dtype=np.int64) steps = np.linspace(1, 50, 2, dtype=np.int64)
# steps = np.linspace(1, 100, 10, dtype=np.int64)
# steps = np.linspace(0, 0.2, 11, dtype=np.float64)
accuracy = [] accuracy = []
precision = [] precision = []
recall = [] recall = []
roc = [] roc = []
phi = []
for step in steps: for step in steps:
actual = [] actual = []
predicted = [] predicted = []
for i in range(100): for i in range(len(norm)):
test_index = random.randint(1, 101) temp_data = norm.pop(i)
temp_data = data.pop(test_index) temp_label = labels.pop(i)
temp_label = labels.pop(test_index)
del dec_tree
dec_tree = tree.DecisionTreeClassifier( # model = tree.DecisionTreeClassifier(
min_samples_leaf=2, # min_samples_leaf=2,
max_depth=None, # max_depth=None, # < 5 is worse, None good too
random_state=False, # random_state=False, # No change
criterion='gini', # criterion='gini', # MCC + 0.1
splitter='best') # splitter='best',
dec_tree = dec_tree.fit(data, labels) # ccp_alpha=0 # Pruning: Keep this 0
result = dec_tree.predict([matrix[test_index][1:]]) # )
# model = ensemble.RandomForestClassifier(
# criterion='gini', # gini best
# )
model = ensemble.ExtraTreesClassifier(
)
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=step,
# n_neighbors=1,
# n_jobs=-1
# )
# model = ensemble.BaggingClassifier(
# )
# model = svm.SVC(decision_function_shape='ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
del model
# normalized_list.append(temp_data) norm.append(temp_data)
data.append(temp_data)
labels.append(temp_label) labels.append(temp_label)
actual.append(temp_label) actual.append(temp_label)
@ -102,29 +116,51 @@ for step in steps:
precision.append(metrics.precision_score(actual, predicted, average='macro')) precision.append(metrics.precision_score(actual, predicted, average='macro'))
recall.append(metrics.recall_score(actual, predicted, average='macro')) recall.append(metrics.recall_score(actual, predicted, average='macro'))
roc.append(roc_auc_score_multiclass(actual, predicted)) roc.append(roc_auc_score_multiclass(actual, predicted))
phi.append(metrics.matthews_corrcoef(actual, predicted))
print(step) print(step)
# Scores # Scores
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics # https://www.evidentlyai.com/classification-metrics/multi-class-metrics
plt.plot(accuracy) # For all: higher is better
plt.title("Accuracy") fig, axs = plt.subplots(2, 2)
plt.show() fig.set_size_inches(12.5, 10)
plt.plot(precision)
plt.title("Precision") axs[0, 0].plot(steps, accuracy)
plt.show() axs[0, 0].set_title("Accuracy")
plt.plot(recall) axs[0, 0].grid()
plt.title("Recall") axs[0, 0].set_ylim(0, 1)
plt.show()
axs[0, 1].plot(steps, precision)
axs[0, 1].set_title("Precision")
axs[0, 1].grid()
axs[0, 1].set_ylim(0, 1)
axs[1, 0].plot(steps, recall)
axs[1, 0].set_title("Recall")
axs[1, 0].grid()
axs[1, 0].set_ylim(0, 1)
df = pd.DataFrame(roc) df = pd.DataFrame(roc)
plt.figure() for i in range(8):
for i in range(7): axs[1, 1].plot(steps, df[i], label=Tree(i).name)
plt.plot(df[i], label=Tree(i).name) axs[1, 1].set_title("ROC AUC")
plt.legend() axs[1, 1].legend()
plt.show() axs[1, 1].grid()
axs[1, 1].set_ylim(0, 1)
# Confusion matrix # Confusion matrix
c_matrix = metrics.confusion_matrix(actual, predicted) c_matrix = metrics.confusion_matrix(actual, predicted)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix) cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
cm_display.plot() cm_display.plot()
plt.show(block=False) plt.show(block=False)
# MCC
# 1 perfect prediction
# 0 random prediction
# -1 opposite prediction
plt.plot(steps, phi)
plt.title("Matthews Correlation Coefficient")
plt.grid()
plt.ylim(-1, 1)
plt.show()

View File

@ -6,7 +6,7 @@ import csv
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
from enum import Enum from enum import Enum
import random import random
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, matthews_corrcoef
class Tree(Enum): class Tree(Enum):
ACCASIA = 0 ACCASIA = 0
@ -110,3 +110,4 @@ print("Precision score (macro)", precision_score(tag_true, tag_predict, average=
print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro')) print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro'))
print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro')) print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro'))
print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro')) print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
print("MCC", matthews_corrcoef(tag_true, tag_predict))