added MCC
This commit is contained in:
parent
311070d1a6
commit
c6c9b50e9d
@ -2,15 +2,17 @@ from enum import Enum
|
||||
from sklearn import tree
|
||||
from sklearn import metrics
|
||||
from sklearn import preprocessing
|
||||
import sklearn
|
||||
from sklearn import neighbors
|
||||
from sklearn import ensemble
|
||||
from sklearn import svm
|
||||
from matplotlib import pyplot as plt
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import random
|
||||
import csv
|
||||
|
||||
SIFT_PATH = "..\\algorithms\\data\\sift.csv"
|
||||
# SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
|
||||
# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
|
||||
SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
|
||||
|
||||
class Tree(Enum):
|
||||
ACCASIA = 0
|
||||
@ -22,11 +24,7 @@ class Tree(Enum):
|
||||
LINDE = 6
|
||||
PLATAAN = 7
|
||||
|
||||
# [[tree1_data],[tree2_data]]
|
||||
# [tree1_label, tree2_label]
|
||||
|
||||
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
||||
|
||||
#creating a set of all the unique classes using the actual class list
|
||||
unique_class = set(actual_class)
|
||||
roc_auc_dict = {}
|
||||
@ -45,10 +43,8 @@ def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
|
||||
|
||||
return roc_auc_dict
|
||||
|
||||
|
||||
labels = []
|
||||
i = 0
|
||||
done = False
|
||||
|
||||
with open(SIFT_PATH, 'r') as file:
|
||||
reader = csv.reader(file, delimiter= ',')
|
||||
@ -66,33 +62,51 @@ with open(SIFT_PATH, 'r') as file:
|
||||
normalized = preprocessing.normalize(data, axis=0, norm='max')
|
||||
norm = list(normalized.tolist())
|
||||
|
||||
steps = np.linspace(2, 20, 10, dtype=np.int64)
|
||||
steps = np.linspace(1, 50, 2, dtype=np.int64)
|
||||
# steps = np.linspace(1, 100, 10, dtype=np.int64)
|
||||
# steps = np.linspace(0, 0.2, 11, dtype=np.float64)
|
||||
accuracy = []
|
||||
precision = []
|
||||
recall = []
|
||||
roc = []
|
||||
phi = []
|
||||
|
||||
for step in steps:
|
||||
actual = []
|
||||
predicted = []
|
||||
|
||||
for i in range(100):
|
||||
test_index = random.randint(1, 101)
|
||||
temp_data = data.pop(test_index)
|
||||
temp_label = labels.pop(test_index)
|
||||
del dec_tree
|
||||
for i in range(len(norm)):
|
||||
temp_data = norm.pop(i)
|
||||
temp_label = labels.pop(i)
|
||||
|
||||
dec_tree = tree.DecisionTreeClassifier(
|
||||
min_samples_leaf=2,
|
||||
max_depth=None,
|
||||
random_state=False,
|
||||
criterion='gini',
|
||||
splitter='best')
|
||||
dec_tree = dec_tree.fit(data, labels)
|
||||
result = dec_tree.predict([matrix[test_index][1:]])
|
||||
# model = tree.DecisionTreeClassifier(
|
||||
# min_samples_leaf=2,
|
||||
# max_depth=None, # < 5 is worse, None good too
|
||||
# random_state=False, # No change
|
||||
# criterion='gini', # MCC + 0.1
|
||||
# splitter='best',
|
||||
# ccp_alpha=0 # Pruning: Keep this 0
|
||||
# )
|
||||
# model = ensemble.RandomForestClassifier(
|
||||
# criterion='gini', # gini best
|
||||
# )
|
||||
model = ensemble.ExtraTreesClassifier(
|
||||
)
|
||||
# model = neighbors.KNeighborsClassifier(
|
||||
# algorithm='auto',
|
||||
# leaf_size=step,
|
||||
# n_neighbors=1,
|
||||
# n_jobs=-1
|
||||
# )
|
||||
# model = ensemble.BaggingClassifier(
|
||||
# )
|
||||
# model = svm.SVC(decision_function_shape='ovr'
|
||||
# )
|
||||
model = model.fit(norm, labels)
|
||||
result = model.predict([temp_data])
|
||||
del model
|
||||
|
||||
# normalized_list.append(temp_data)
|
||||
data.append(temp_data)
|
||||
norm.append(temp_data)
|
||||
labels.append(temp_label)
|
||||
|
||||
actual.append(temp_label)
|
||||
@ -102,29 +116,51 @@ for step in steps:
|
||||
precision.append(metrics.precision_score(actual, predicted, average='macro'))
|
||||
recall.append(metrics.recall_score(actual, predicted, average='macro'))
|
||||
roc.append(roc_auc_score_multiclass(actual, predicted))
|
||||
phi.append(metrics.matthews_corrcoef(actual, predicted))
|
||||
|
||||
print(step)
|
||||
|
||||
# Scores
|
||||
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics
|
||||
plt.plot(accuracy)
|
||||
plt.title("Accuracy")
|
||||
plt.show()
|
||||
plt.plot(precision)
|
||||
plt.title("Precision")
|
||||
plt.show()
|
||||
plt.plot(recall)
|
||||
plt.title("Recall")
|
||||
plt.show()
|
||||
# For all: higher is better
|
||||
fig, axs = plt.subplots(2, 2)
|
||||
fig.set_size_inches(12.5, 10)
|
||||
|
||||
axs[0, 0].plot(steps, accuracy)
|
||||
axs[0, 0].set_title("Accuracy")
|
||||
axs[0, 0].grid()
|
||||
axs[0, 0].set_ylim(0, 1)
|
||||
|
||||
axs[0, 1].plot(steps, precision)
|
||||
axs[0, 1].set_title("Precision")
|
||||
axs[0, 1].grid()
|
||||
axs[0, 1].set_ylim(0, 1)
|
||||
|
||||
axs[1, 0].plot(steps, recall)
|
||||
axs[1, 0].set_title("Recall")
|
||||
axs[1, 0].grid()
|
||||
axs[1, 0].set_ylim(0, 1)
|
||||
|
||||
df = pd.DataFrame(roc)
|
||||
plt.figure()
|
||||
for i in range(7):
|
||||
plt.plot(df[i], label=Tree(i).name)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
for i in range(8):
|
||||
axs[1, 1].plot(steps, df[i], label=Tree(i).name)
|
||||
axs[1, 1].set_title("ROC AUC")
|
||||
axs[1, 1].legend()
|
||||
axs[1, 1].grid()
|
||||
axs[1, 1].set_ylim(0, 1)
|
||||
|
||||
# Confusion matrix
|
||||
c_matrix = metrics.confusion_matrix(actual, predicted)
|
||||
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
|
||||
cm_display.plot()
|
||||
plt.show(block=False)
|
||||
plt.show(block=False)
|
||||
|
||||
# MCC
|
||||
# 1 perfect prediction
|
||||
# 0 random prediction
|
||||
# -1 opposite prediction
|
||||
plt.plot(steps, phi)
|
||||
plt.title("Matthews Correlation Coefficient")
|
||||
plt.grid()
|
||||
plt.ylim(-1, 1)
|
||||
plt.show()
|
@ -6,7 +6,7 @@ import csv
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from enum import Enum
|
||||
import random
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
|
||||
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, matthews_corrcoef
|
||||
|
||||
class Tree(Enum):
|
||||
ACCASIA = 0
|
||||
@ -109,4 +109,5 @@ print("Accuracy score", accuracy_score(tag_true, tag_predict))
|
||||
print("Precision score (macro)", precision_score(tag_true, tag_predict, average='macro'))
|
||||
print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro'))
|
||||
print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro'))
|
||||
print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
|
||||
print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
|
||||
print("MCC", matthews_corrcoef(tag_true, tag_predict))
|
Loading…
Reference in New Issue
Block a user