This commit is contained in:
Tom Selier 2023-10-20 16:11:10 +02:00
parent bfe4a09f8d
commit 76c5f61dda

View File

@ -1,14 +1,14 @@
from enum import Enum # models
from sklearn import tree from sklearn import tree
from sklearn import metrics
from sklearn import preprocessing from sklearn import preprocessing
from sklearn import neighbors from sklearn import neighbors
from sklearn import ensemble from sklearn import ensemble
from sklearn import svm from sklearn import svm
from matplotlib import pyplot as plt
import pandas as pd # other
from enum import Enum
import numpy as np import numpy as np
import random import time
import csv import csv
import plots import plots
@ -49,58 +49,68 @@ with open(PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max') normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist()) norm = list(normalized.tolist())
steps = np.linspace(0.1, 1.0, 10, dtype=np.float64) steps = np.linspace(1e-4, 1, 20, dtype=np.float64)
print("Step \t seconds/step")
for step in steps: for step in steps:
actual = [] actual = []
predicted = [] predicted = []
time_start = time.time()
for i in range(len(norm)): for j in range(3):
temp_data = norm.pop(i) for i in range(len(norm)):
temp_label = labels.pop(i) temp_data = norm.pop(i)
temp_label = labels.pop(i)
# model = tree.DecisionTreeClassifier( # model = tree.DecisionTreeClassifier(
# class_weight=None, # class_weight=None,
# min_samples_leaf=2, # min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too # max_depth=None, # < 5 is worse, None good too
# random_state=False, # No change # random_state=False, # No change
# criterion='gini', # MCC + 0.1 # criterion='gini', # MCC + 0.1
# splitter='best', # splitter='best',
# ccp_alpha=0 # Pruning: Keep this 0 # ccp_alpha=0 # Pruning: Keep this 0
# ) # )
# model = ensemble.RandomForestClassifier( # model = ensemble.RandomForestClassifier(
# n_estimators=20, # higher is better, but slower (def: 100) # n_estimators=20, # higher is better, but slower (def: 100)
# criterion='gini', # gini best # criterion='gini', # gini best
# ) # )
# model = ensemble.ExtraTreesClassifier( # model = ensemble.ExtraTreesClassifier(
# n_estimators=150 # higher is better, but slower (def: 100) # n_estimators=step # higher is better, but slower (def: 100)
# ) # )
# model = neighbors.KNeighborsClassifier( # model = neighbors.KNeighborsClassifier(
# algorithm='auto', # algorithm='auto',
# leaf_size=2, # leaf_size=2,
# n_neighbors=step, # n_neighbors=step,
# ) # )
model = ensemble.BaggingClassifier( # model = ensemble.BaggingClassifier(
n_estimators=5, # n_estimators=5,
max_samples=.5, # max_samples=.5,
max_features=.5, # max_features=.5,
bootstrap=False # bootstrap=False
) # )
# model = svm.SVC(decision_function_shape='ovr' # model = svm.SVC(
# ) # C = 0.8,
model = model.fit(norm, labels) # kernel = "poly",
result = model.predict([temp_data]) # degree = 5,
# coef0 = 6,
# probability = False,
# break_ties=True,
# decision_function_shape = 'ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
norm.append(temp_data) norm.append(temp_data)
labels.append(temp_label) labels.append(temp_label)
actual.append(temp_label) actual.append(temp_label)
predicted.append(result[0]) predicted.append(result[0])
actual_list.append(actual) actual_list.append(actual)
predicted_list.append(predicted) predicted_list.append(predicted)
print(step) print("%.4f"%step, "\t", "%.2f"%(time.time()-time_start))
plots.plotMetrics(actual_list, predicted_list) plots.plotMetrics(actual_list, predicted_list)
plots.plotConfusion(actual_list[0], predicted_list[0]) plots.plotConfusion(actual_list[0], predicted_list[0])