This commit is contained in:
Tom Selier 2023-10-20 16:11:10 +02:00
parent bfe4a09f8d
commit 76c5f61dda
1 changed files with 55 additions and 45 deletions

View File

@ -1,14 +1,14 @@
from enum import Enum
# models
from sklearn import tree
from sklearn import metrics
from sklearn import preprocessing
from sklearn import neighbors
from sklearn import ensemble
from sklearn import svm
from matplotlib import pyplot as plt
import pandas as pd
# other
from enum import Enum
import numpy as np
import random
import time
import csv
import plots
@ -49,58 +49,68 @@ with open(PATH, 'r') as file:
normalized = preprocessing.normalize(data, axis=0, norm='max')
norm = list(normalized.tolist())
steps = np.linspace(0.1, 1.0, 10, dtype=np.float64)
steps = np.linspace(1e-4, 1, 20, dtype=np.float64)
print("Step \t seconds/step")
for step in steps:
actual = []
predicted = []
time_start = time.time()
for i in range(len(norm)):
temp_data = norm.pop(i)
temp_label = labels.pop(i)
for j in range(3):
for i in range(len(norm)):
temp_data = norm.pop(i)
temp_label = labels.pop(i)
# model = tree.DecisionTreeClassifier(
# class_weight=None,
# min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too
# random_state=False, # No change
# criterion='gini', # MCC + 0.1
# splitter='best',
# ccp_alpha=0 # Pruning: Keep this 0
# )
# model = ensemble.RandomForestClassifier(
# n_estimators=20, # higher is better, but slower (def: 100)
# criterion='gini', # gini best
# )
# model = ensemble.ExtraTreesClassifier(
# n_estimators=150 # higher is better, but slower (def: 100)
# )
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=2,
# n_neighbors=step,
# )
model = ensemble.BaggingClassifier(
n_estimators=5,
max_samples=.5,
max_features=.5,
bootstrap=False
)
# model = svm.SVC(decision_function_shape='ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
# model = tree.DecisionTreeClassifier(
# class_weight=None,
# min_samples_leaf=2,
# max_depth=None, # < 5 is worse, None good too
# random_state=False, # No change
# criterion='gini', # MCC + 0.1
# splitter='best',
# ccp_alpha=0 # Pruning: Keep this 0
# )
# model = ensemble.RandomForestClassifier(
# n_estimators=20, # higher is better, but slower (def: 100)
# criterion='gini', # gini best
# )
# model = ensemble.ExtraTreesClassifier(
# n_estimators=step # higher is better, but slower (def: 100)
# )
# model = neighbors.KNeighborsClassifier(
# algorithm='auto',
# leaf_size=2,
# n_neighbors=step,
# )
# model = ensemble.BaggingClassifier(
# n_estimators=5,
# max_samples=.5,
# max_features=.5,
# bootstrap=False
# )
# model = svm.SVC(
# C = 0.8,
# kernel = "poly",
# degree = 5,
# coef0 = 6,
# probability = False,
# break_ties=True,
# decision_function_shape = 'ovr'
# )
model = model.fit(norm, labels)
result = model.predict([temp_data])
norm.append(temp_data)
labels.append(temp_label)
norm.append(temp_data)
labels.append(temp_label)
actual.append(temp_label)
predicted.append(result[0])
actual.append(temp_label)
predicted.append(result[0])
actual_list.append(actual)
predicted_list.append(predicted)
print(step)
print("%.4f"%step, "\t", "%.2f"%(time.time()-time_start))
plots.plotMetrics(actual_list, predicted_list)
plots.plotConfusion(actual_list[0], predicted_list[0])