idfk anymore statistics or something

This commit is contained in:
Tom Selier 2023-10-22 22:09:13 +02:00
parent 09f60efdb7
commit 81383594fb

68
src/experiments/anova.py Normal file
View File

@ -0,0 +1,68 @@
from scipy.stats import f_oneway
import csv
import numpy as np
from enum import Enum
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from IPython.display import display
class Tree(Enum):
ACCASIA = 0
BERK = 1
EIK = 2
ELS = 3
ESDOORN = 4
ES = 5
LINDE = 6
PLATAAN = 7
PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\complete.csv"
data = []
with open(PATH) as file:
reader = csv.reader(file, delimiter=',')
lst = list(reader)
for row in lst:
data.append(row)
header = data.pop(0)
header = [x.strip() for x in header]
# Create a dataframe
df = pd.DataFrame(data, columns=header)
df.astype({col: float for col in df.columns[1:]})
df['Tree'] = [Tree[x.upper()].value for x in df['Tree']]
for idx, name in enumerate(header[1:]):
df[name] = df[name].astype(float)
# df[name] = (df[name] - df[name].mean()) / df[name].std()
# df.info()
result = {}
for name in header:
prep = [[] for x in range(8)]
for idx, tree in enumerate(df['Tree']):
prep[tree].append(df[name][idx])
F, p = f_oneway(
prep[0], prep[1],prep[2], prep[3],
prep[4], prep[5], prep[6], prep[7])
result[name]= p
result = sorted(result.items(), key=lambda x:x[1])
print("p score")
for res in result[1:]:
print(res[0], res[1])
# df.boxplot(header[29], by="Tree", figsize=(17, 24))
# for idx, name in enumerate(header[1:]):
# F, p = f_oneway(df['Tree'], df[name])
# if p < 0.05 and p != 0:
# print(idx+1, name, '\t\t', p)