From a13baf354934e8e2ebc7232cf21a33b1ca2a45b7 Mon Sep 17 00:00:00 2001 From: Tom Selier Date: Sat, 14 Oct 2023 10:20:12 +0200 Subject: [PATCH] model tech --- .../decision_tree/decision_tree.py | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/src/experiments/decision_tree/decision_tree.py b/src/experiments/decision_tree/decision_tree.py index 80bd4c6..8df1901 100644 --- a/src/experiments/decision_tree/decision_tree.py +++ b/src/experiments/decision_tree/decision_tree.py @@ -49,7 +49,6 @@ i = 0 with open(SIFT_PATH, 'r') as file: reader = csv.reader(file, delimiter= ',') matrix = list(reader) - data = [[] for x in range(len(matrix)-1)] for row in matrix[1:]: ## append data to lists @@ -62,9 +61,9 @@ with open(SIFT_PATH, 'r') as file: normalized = preprocessing.normalize(data, axis=0, norm='max') norm = list(normalized.tolist()) -steps = np.linspace(1, 50, 2, dtype=np.int64) +steps = np.linspace(0, 9, 10, dtype=np.int64) # steps = np.linspace(1, 100, 10, dtype=np.int64) -# steps = np.linspace(0, 0.2, 11, dtype=np.float64) +# steps = np.linspace(0, 1, 11, dtype=np.float64) accuracy = [] precision = [] recall = [] @@ -74,12 +73,19 @@ phi = [] for step in steps: actual = [] predicted = [] + # weights = {} + # for idx, element in enumerate(Tree): + # # print(idx, element) + # weights[idx] = 0.1 + # weights[5] = 1 for i in range(len(norm)): temp_data = norm.pop(i) temp_label = labels.pop(i) # model = tree.DecisionTreeClassifier( + # # class_weight=weights, + # class_weight=None, # min_samples_leaf=2, # max_depth=None, # < 5 is worse, None good too # random_state=False, # No change @@ -88,22 +94,24 @@ for step in steps: # ccp_alpha=0 # Pruning: Keep this 0 # ) # model = ensemble.RandomForestClassifier( + # n_estimators=20, # higher is better, but slower (def: 100) # criterion='gini', # gini best # ) - model = ensemble.ExtraTreesClassifier( - ) - # model = neighbors.KNeighborsClassifier( - # algorithm='auto', - # leaf_size=step, - # n_neighbors=1, - # n_jobs=-1 + # model = ensemble.ExtraTreesClassifier( # ) + model = neighbors.KNeighborsClassifier( + algorithm='auto', + leaf_size=2, + n_neighbors=1, + n_jobs=-1 + ) # model = ensemble.BaggingClassifier( # ) # model = svm.SVC(decision_function_shape='ovr' # ) model = model.fit(norm, labels) result = model.predict([temp_data]) + # features = model.feature_importances_ del model norm.append(temp_data) @@ -120,6 +128,13 @@ for step in steps: print(step) +# Feature importance +# plt.bar(matrix[0][1:], features) +# fig, ax = plt.subplots() +# ax.set_title("Feature Importance") +# ax.barh(matrix[0][1:], features) +# plt.show() + # Scores # https://www.evidentlyai.com/classification-metrics/multi-class-metrics # For all: higher is better @@ -127,17 +142,17 @@ fig, axs = plt.subplots(2, 2) fig.set_size_inches(12.5, 10) axs[0, 0].plot(steps, accuracy) -axs[0, 0].set_title("Accuracy") +axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy)) axs[0, 0].grid() axs[0, 0].set_ylim(0, 1) axs[0, 1].plot(steps, precision) -axs[0, 1].set_title("Precision") +axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision)) axs[0, 1].grid() axs[0, 1].set_ylim(0, 1) axs[1, 0].plot(steps, recall) -axs[1, 0].set_title("Recall") +axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall)) axs[1, 0].grid() axs[1, 0].set_ylim(0, 1) @@ -148,6 +163,7 @@ axs[1, 1].set_title("ROC AUC") axs[1, 1].legend() axs[1, 1].grid() axs[1, 1].set_ylim(0, 1) +plt.show() # Confusion matrix c_matrix = metrics.confusion_matrix(actual, predicted) @@ -160,7 +176,7 @@ plt.show(block=False) # 0 random prediction # -1 opposite prediction plt.plot(steps, phi) -plt.title("Matthews Correlation Coefficient") +plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi)) plt.grid() plt.ylim(-1, 1) plt.show() \ No newline at end of file