added MCC

2023-10-13 21:54:50 +02:00 · 2023-10-13 21:54:50 +02:00 · c6c9b50e9d
commit c6c9b50e9d
parent 311070d1a6
2 changed files with 79 additions and 42 deletions
--- a/src/experiments/decision_tree/decision_tree.py
+++ b/src/experiments/decision_tree/decision_tree.py
@ -2,15 +2,17 @@ from enum import Enum
 from sklearn import tree
 from sklearn import metrics
 from sklearn import preprocessing
-import sklearn
+from sklearn import neighbors
 from sklearn import ensemble
 from sklearn import svm
 from matplotlib import pyplot as plt
 import pandas as pd
 import numpy as np
 import random
 import csv
-SIFT_PATH = "..\\algorithms\\data\\sift.csv"
+# SIFT_PATH = "..\\algorithms\\data\\sift.csv"
-# SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
+SIFT_PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\datacsv\\result-2023-10-13T14.46.23.csv"
 class Tree(Enum):
    ACCASIA = 0
@ -22,11 +24,7 @@ class Tree(Enum):
    LINDE = 6
    PLATAAN = 7
 # [[tree1_data],[tree2_data]]
 # [tree1_label, tree2_label]
 def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
    #creating a set of all the unique classes using the actual class list
    unique_class = set(actual_class)
    roc_auc_dict = {}
@ -45,10 +43,8 @@ def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
    return roc_auc_dict
 labels = []
 i = 0
 done = False
 with open(SIFT_PATH, 'r') as file:
    reader = csv.reader(file, delimiter= ',')
@ -66,33 +62,51 @@ with open(SIFT_PATH, 'r') as file:
    normalized = preprocessing.normalize(data, axis=0, norm='max')
    norm = list(normalized.tolist())
-steps = np.linspace(2, 20, 10, dtype=np.int64)
+steps = np.linspace(1, 50, 2, dtype=np.int64)
 # steps = np.linspace(1, 100, 10, dtype=np.int64)
 # steps = np.linspace(0, 0.2, 11, dtype=np.float64)
 accuracy = []
 precision = []
 recall = []
 roc = []
 phi = []
 for step in steps:
    actual = []
    predicted = []
-    for i in range(100):
+    for i in range(len(norm)):
-        test_index = random.randint(1, 101)
+        temp_data = norm.pop(i)
-        temp_data = data.pop(test_index)
+        temp_label = labels.pop(i)
        temp_label = labels.pop(test_index)
        del dec_tree
-        dec_tree = tree.DecisionTreeClassifier(
+        # model = tree.DecisionTreeClassifier(
-            min_samples_leaf=2,
+        #     min_samples_leaf=2,
-            max_depth=None,
+        #     max_depth=None, # < 5 is worse, None good too
-            random_state=False,
+        #     random_state=False, # No change
-            criterion='gini', 
+        #     criterion='gini', # MCC + 0.1
-            splitter='best')
+        #     splitter='best',
-        dec_tree = dec_tree.fit(data, labels)
+        #     ccp_alpha=0 # Pruning: Keep this 0
-        result = dec_tree.predict([matrix[test_index][1:]])
+        # )
        # model = ensemble.RandomForestClassifier(
        #     criterion='gini', # gini best
        # )
        model = ensemble.ExtraTreesClassifier(
        )
        # model = neighbors.KNeighborsClassifier(
        #     algorithm='auto',
        #     leaf_size=step,
        #     n_neighbors=1,
        #     n_jobs=-1
        # )
        # model = ensemble.BaggingClassifier(
        # )
        # model = svm.SVC(decision_function_shape='ovr'
        # )
        model = model.fit(norm, labels)
        result = model.predict([temp_data])
        del model
-        # normalized_list.append(temp_data)
+        norm.append(temp_data)
        data.append(temp_data)
        labels.append(temp_label)
        actual.append(temp_label)
@ -102,29 +116,51 @@ for step in steps:
    precision.append(metrics.precision_score(actual, predicted, average='macro'))
    recall.append(metrics.recall_score(actual, predicted, average='macro'))
    roc.append(roc_auc_score_multiclass(actual, predicted))
    phi.append(metrics.matthews_corrcoef(actual, predicted))
    print(step)
 # Scores
 # https://www.evidentlyai.com/classification-metrics/multi-class-metrics
-plt.plot(accuracy)
+# For all: higher is better
-plt.title("Accuracy")
+fig, axs = plt.subplots(2, 2)
-plt.show()
+fig.set_size_inches(12.5, 10)
-plt.plot(precision)
+
-plt.title("Precision")
+axs[0, 0].plot(steps, accuracy)
-plt.show()
+axs[0, 0].set_title("Accuracy")
-plt.plot(recall)
+axs[0, 0].grid()
-plt.title("Recall")
+axs[0, 0].set_ylim(0, 1)
-plt.show()
+
 axs[0, 1].plot(steps, precision)
 axs[0, 1].set_title("Precision")
 axs[0, 1].grid()
 axs[0, 1].set_ylim(0, 1)
 axs[1, 0].plot(steps, recall)
 axs[1, 0].set_title("Recall")
 axs[1, 0].grid()
 axs[1, 0].set_ylim(0, 1)
 df = pd.DataFrame(roc)
-plt.figure()
+for i in range(8):
-for i in range(7):
+    axs[1, 1].plot(steps, df[i], label=Tree(i).name)
-    plt.plot(df[i], label=Tree(i).name)
+axs[1, 1].set_title("ROC AUC")
-plt.legend()
+axs[1, 1].legend()
-plt.show()
+axs[1, 1].grid()
 axs[1, 1].set_ylim(0, 1)
 # Confusion matrix
 c_matrix = metrics.confusion_matrix(actual, predicted)
 cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=c_matrix)
 cm_display.plot()
-plt.show(block=False)
+plt.show(block=False)
 # MCC
 # 1 perfect prediction
 # 0 random prediction
 # -1 opposite prediction
 plt.plot(steps, phi)
 plt.title("Matthews Correlation Coefficient")
 plt.grid()
 plt.ylim(-1, 1)
 plt.show()
--- a/src/experiments/knn/knn.py
+++ b/src/experiments/knn/knn.py
@ -6,7 +6,7 @@ import csv
 from sklearn.preprocessing import MinMaxScaler
 from enum import Enum
 import random
-from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
+from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, matthews_corrcoef
 class Tree(Enum):
    ACCASIA = 0
@ -109,4 +109,5 @@ print("Accuracy score", accuracy_score(tag_true, tag_predict))
 print("Precision score (macro)", precision_score(tag_true, tag_predict, average='macro'))
 print("Precision score (micro)", precision_score(tag_true, tag_predict, average='micro'))
 print("Recall score (macro)", recall_score(tag_true, tag_predict, average='macro'))
-print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
+print("Recall score (micro)", recall_score(tag_true, tag_predict, average='micro'))
 print("MCC", matthews_corrcoef(tag_true, tag_predict))