diff --git a/src/experiments/anova.py b/src/experiments/anova.py new file mode 100644 index 0000000..49859b3 --- /dev/null +++ b/src/experiments/anova.py @@ -0,0 +1,68 @@ +from scipy.stats import f_oneway +import csv +import numpy as np +from enum import Enum +import pandas as pd +import statsmodels.api as sm +from statsmodels.formula.api import ols +from IPython.display import display + +class Tree(Enum): + ACCASIA = 0 + BERK = 1 + EIK = 2 + ELS = 3 + ESDOORN = 4 + ES = 5 + LINDE = 6 + PLATAAN = 7 + +PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\complete.csv" + +data = [] +with open(PATH) as file: + reader = csv.reader(file, delimiter=',') + lst = list(reader) + for row in lst: + data.append(row) + +header = data.pop(0) +header = [x.strip() for x in header] + +# Create a dataframe +df = pd.DataFrame(data, columns=header) +df.astype({col: float for col in df.columns[1:]}) + +df['Tree'] = [Tree[x.upper()].value for x in df['Tree']] + +for idx, name in enumerate(header[1:]): + df[name] = df[name].astype(float) + # df[name] = (df[name] - df[name].mean()) / df[name].std() + +# df.info() + + + +result = {} +for name in header: + prep = [[] for x in range(8)] + for idx, tree in enumerate(df['Tree']): + prep[tree].append(df[name][idx]) + + F, p = f_oneway( + prep[0], prep[1],prep[2], prep[3], + prep[4], prep[5], prep[6], prep[7]) + result[name]= p + +result = sorted(result.items(), key=lambda x:x[1]) + +print("p score") +for res in result[1:]: + print(res[0], res[1]) + +# df.boxplot(header[29], by="Tree", figsize=(17, 24)) + +# for idx, name in enumerate(header[1:]): +# F, p = f_oneway(df['Tree'], df[name]) +# if p < 0.05 and p != 0: +# print(idx+1, name, '\t\t', p) \ No newline at end of file