model tech

This commit is contained in:
Tom Selier 2023-10-14 10:20:12 +02:00
parent c6c9b50e9d
commit a13baf3549

View File

@ -49,7 +49,6 @@ i = 0
with open(SIFT_PATH, 'r') as file: with open(SIFT_PATH, 'r') as file:
reader = csv.reader(file, delimiter= ',') reader = csv.reader(file, delimiter= ',')
matrix = list(reader) matrix = list(reader)
data = [[] for x in range(len(matrix)-1)] data = [[] for x in range(len(matrix)-1)]
for row in matrix[1:]: for row in matrix[1:]:
## append data to lists ## append data to lists
@ -62,9 +61,9 @@ with open(SIFT_PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max') normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist()) norm = list(normalized.tolist())
steps = np.linspace(1, 50, 2, dtype=np.int64) steps = np.linspace(0, 9, 10, dtype=np.int64)
# steps = np.linspace(1, 100, 10, dtype=np.int64) # steps = np.linspace(1, 100, 10, dtype=np.int64)
# steps = np.linspace(0, 0.2, 11, dtype=np.float64) # steps = np.linspace(0, 1, 11, dtype=np.float64)
accuracy = [] accuracy = []
precision = [] precision = []
recall = [] recall = []
@ -74,12 +73,19 @@ phi = []
for step in steps: for step in steps:
actual = [] actual = []
predicted = [] predicted = []
# weights = {}
# for idx, element in enumerate(Tree):
# # print(idx, element)
# weights[idx] = 0.1
# weights[5] = 1
for i in range(len(norm)): for i in range(len(norm)):
temp_data = norm.pop(i) temp_data = norm.pop(i)
temp_label = labels.pop(i) temp_label = labels.pop(i)
# model = tree.DecisionTreeClassifier( # model = tree.DecisionTreeClassifier(
# # class_weight=weights,
# class_weight=None,
# min_samples_leaf=2, # min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too # max_depth=None, # < 5 is worse, None good too
# random_state=False, # No change # random_state=False, # No change
@ -88,22 +94,24 @@ for step in steps:
# ccp_alpha=0 # Pruning: Keep this 0 # ccp_alpha=0 # Pruning: Keep this 0
# ) # )
# model = ensemble.RandomForestClassifier( # model = ensemble.RandomForestClassifier(
# n_estimators=20, # higher is better, but slower (def: 100)
# criterion='gini', # gini best # criterion='gini', # gini best
# ) # )
model = ensemble.ExtraTreesClassifier( # model = ensemble.ExtraTreesClassifier(
)
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=step,
# n_neighbors=1,
# n_jobs=-1
# ) # )
model = neighbors.KNeighborsClassifier(
algorithm='auto',
leaf_size=2,
n_neighbors=1,
n_jobs=-1
)
# model = ensemble.BaggingClassifier( # model = ensemble.BaggingClassifier(
# ) # )
# model = svm.SVC(decision_function_shape='ovr' # model = svm.SVC(decision_function_shape='ovr'
# ) # )
model = model.fit(norm, labels) model = model.fit(norm, labels)
result = model.predict([temp_data]) result = model.predict([temp_data])
# features = model.feature_importances_
del model del model
norm.append(temp_data) norm.append(temp_data)
@ -120,6 +128,13 @@ for step in steps:
print(step) print(step)
# Feature importance
# plt.bar(matrix[0][1:], features)
# fig, ax = plt.subplots()
# ax.set_title("Feature Importance")
# ax.barh(matrix[0][1:], features)
# plt.show()
# Scores # Scores
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics # https://www.evidentlyai.com/classification-metrics/multi-class-metrics
# For all: higher is better # For all: higher is better
@ -127,17 +142,17 @@ fig, axs = plt.subplots(2, 2)
fig.set_size_inches(12.5, 10) fig.set_size_inches(12.5, 10)
axs[0, 0].plot(steps, accuracy) axs[0, 0].plot(steps, accuracy)
axs[0, 0].set_title("Accuracy") axs[0, 0].set_title("Accuracy: $\mu$: %f"%np.mean(accuracy))
axs[0, 0].grid() axs[0, 0].grid()
axs[0, 0].set_ylim(0, 1) axs[0, 0].set_ylim(0, 1)
axs[0, 1].plot(steps, precision) axs[0, 1].plot(steps, precision)
axs[0, 1].set_title("Precision") axs[0, 1].set_title("Precision $\mu$: %f"%np.mean(precision))
axs[0, 1].grid() axs[0, 1].grid()
axs[0, 1].set_ylim(0, 1) axs[0, 1].set_ylim(0, 1)
axs[1, 0].plot(steps, recall) axs[1, 0].plot(steps, recall)
axs[1, 0].set_title("Recall") axs[1, 0].set_title("Recall $\mu$: %f"%np.mean(recall))
axs[1, 0].grid() axs[1, 0].grid()
axs[1, 0].set_ylim(0, 1) axs[1, 0].set_ylim(0, 1)
@ -148,6 +163,7 @@ axs[1, 1].set_title("ROC AUC")
axs[1, 1].legend() axs[1, 1].legend()
axs[1, 1].grid() axs[1, 1].grid()
axs[1, 1].set_ylim(0, 1) axs[1, 1].set_ylim(0, 1)
plt.show()
# Confusion matrix # Confusion matrix
c_matrix = metrics.confusion_matrix(actual, predicted) c_matrix = metrics.confusion_matrix(actual, predicted)
@ -160,7 +176,7 @@ plt.show(block=False)
# 0 random prediction # 0 random prediction
# -1 opposite prediction # -1 opposite prediction
plt.plot(steps, phi) plt.plot(steps, phi)
plt.title("Matthews Correlation Coefficient") plt.title("Matthews Correlation Coefficient $\mu$: %f"%np.mean(phi))
plt.grid() plt.grid()
plt.ylim(-1, 1) plt.ylim(-1, 1)
plt.show() plt.show()