model tech

2023-10-14 10:20:12 +02:00 · 2023-10-14 10:20:12 +02:00 · a13baf3549
commit a13baf3549
parent c6c9b50e9d
1 changed files with 30 additions and 14 deletions
--- a/src/experiments/decision_tree/decision_tree.py
+++ b/src/experiments/decision_tree/decision_tree.py
@ -49,7 +49,6 @@ i = 0
 with open(SIFT_PATH, 'r') as file:
    reader = csv.reader(file, delimiter= ',')
    matrix = list(reader)
-
    data = [[] for x in range(len(matrix)-1)]
    for row in matrix[1:]:
        ## append data to lists
@ -62,9 +61,9 @@ with open(SIFT_PATH, 'r') as file:
    normalized = preprocessing.normalize(data, axis=0, norm='max')
    norm = list(normalized.tolist())

-steps = np.linspace(1, 50, 2, dtype=np.int64)
+steps = np.linspace(0, 9, 10, dtype=np.int64)
 # steps = np.linspace(1, 100, 10, dtype=np.int64)
-# steps = np.linspace(0, 0.2, 11, dtype=np.float64)
+# steps = np.linspace(0, 1, 11, dtype=np.float64)
 accuracy = []
 precision = []
 recall = []
@ -74,12 +73,19 @@ phi = []
 for step in steps:
    actual = []
    predicted = []
+    # weights = {}
+    # for idx, element in enumerate(Tree):
+    #     # print(idx, element)
+    #     weights[idx] = 0.1
+    # weights[5] = 1

    for i in range(len(norm)):
        temp_data = norm.pop(i)
        temp_label = labels.pop(i)

        # model = tree.DecisionTreeClassifier(
+        #     # class_weight=weights,
+        #     class_weight=None,
        #     min_samples_leaf=2,
        #     max_depth=None, # < 5 is worse, None good too
        #     random_state=False, # No change
@ -88,22 +94,24 @@ for step in steps:
        #     ccp_alpha=0 # Pruning: Keep this 0
        # )
        # model = ensemble.RandomForestClassifier(
+        #     n_estimators=20, # higher is better, but slower (def: 100)
        #     criterion='gini', # gini best
        # )
-        model = ensemble.ExtraTreesClassifier(
-        )
-        # model = neighbors.KNeighborsClassifier(
-        #     algorithm='auto',
-        #     leaf_size=step,
-        #     n_neighbors=1,
-        #     n_jobs=-1
+        # model = ensemble.ExtraTreesClassifier(
        # )
+        model = neighbors.KNeighborsClassifier(
+            algorithm='auto',
+            leaf_size=2,
+            n_neighbors=1,
+            n_jobs=-1
+        )
        # model = ensemble.BaggingClassifier(
        # )
        # model = svm.SVC(decision_function_shape='ovr'
        # )
        model = model.fit(norm, labels)
        result = model.predict([temp_data])
+        # features = model.feature_importances_
        del model

        norm.append(temp_data)
@ -120,6 +128,13 @@ for step in steps:

    print(step)

+# Feature importance
+# plt.bar(matrix[0][1:], features)
+# fig, ax = plt.subplots()
+# ax.set_title("Feature Importance")
+# ax.barh(matrix[0][1:], features)
+# plt.show()
+
 # Scores
 # https://www.evidentlyai.com/classification-metrics/multi-class-metrics
 # For all: higher is better
@ -127,17 +142,17 @@ fig, axs = plt.subplots(2, 2)
 fig.set_size_inches(12.5, 10)

 axs[0, 0].plot(steps, accuracy)
-axs[0, 0].set_title("Accuracy")
+axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy))
 axs[0, 0].grid()
 axs[0, 0].set_ylim(0, 1)

 axs[0, 1].plot(steps, precision)
-axs[0, 1].set_title("Precision")
+axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision))
 axs[0, 1].grid()
 axs[0, 1].set_ylim(0, 1)

 axs[1, 0].plot(steps, recall)
-axs[1, 0].set_title("Recall")
+axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall))
 axs[1, 0].grid()
 axs[1, 0].set_ylim(0, 1)

@ -148,6 +163,7 @@ axs[1, 1].set_title("ROC AUC")
 axs[1, 1].legend()
 axs[1, 1].grid()
 axs[1, 1].set_ylim(0, 1)
+plt.show()

 # Confusion matrix
 c_matrix = metrics.confusion_matrix(actual, predicted)
@ -160,7 +176,7 @@ plt.show(block=False)
 # 0 random prediction
 # -1 opposite prediction
 plt.plot(steps, phi)
-plt.title("Matthews Correlation Coefficient")
+plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi))
 plt.grid()
 plt.ylim(-1, 1)
 plt.show()