idfk anymore statistics or something
This commit is contained in:
parent
09f60efdb7
commit
81383594fb
68
src/experiments/anova.py
Normal file
68
src/experiments/anova.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from scipy.stats import f_oneway
|
||||||
|
import csv
|
||||||
|
import numpy as np
|
||||||
|
from enum import Enum
|
||||||
|
import pandas as pd
|
||||||
|
import statsmodels.api as sm
|
||||||
|
from statsmodels.formula.api import ols
|
||||||
|
from IPython.display import display
|
||||||
|
|
||||||
|
class Tree(Enum):
|
||||||
|
ACCASIA = 0
|
||||||
|
BERK = 1
|
||||||
|
EIK = 2
|
||||||
|
ELS = 3
|
||||||
|
ESDOORN = 4
|
||||||
|
ES = 5
|
||||||
|
LINDE = 6
|
||||||
|
PLATAAN = 7
|
||||||
|
|
||||||
|
PATH = "C:\\Users\\Tom\\Desktop\\Files\\Repositories\\EV5_Beeldherk_Bomen\\dataset\\csv\\complete.csv"
|
||||||
|
|
||||||
|
data = []
|
||||||
|
with open(PATH) as file:
|
||||||
|
reader = csv.reader(file, delimiter=',')
|
||||||
|
lst = list(reader)
|
||||||
|
for row in lst:
|
||||||
|
data.append(row)
|
||||||
|
|
||||||
|
header = data.pop(0)
|
||||||
|
header = [x.strip() for x in header]
|
||||||
|
|
||||||
|
# Create a dataframe
|
||||||
|
df = pd.DataFrame(data, columns=header)
|
||||||
|
df.astype({col: float for col in df.columns[1:]})
|
||||||
|
|
||||||
|
df['Tree'] = [Tree[x.upper()].value for x in df['Tree']]
|
||||||
|
|
||||||
|
for idx, name in enumerate(header[1:]):
|
||||||
|
df[name] = df[name].astype(float)
|
||||||
|
# df[name] = (df[name] - df[name].mean()) / df[name].std()
|
||||||
|
|
||||||
|
# df.info()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
for name in header:
|
||||||
|
prep = [[] for x in range(8)]
|
||||||
|
for idx, tree in enumerate(df['Tree']):
|
||||||
|
prep[tree].append(df[name][idx])
|
||||||
|
|
||||||
|
F, p = f_oneway(
|
||||||
|
prep[0], prep[1],prep[2], prep[3],
|
||||||
|
prep[4], prep[5], prep[6], prep[7])
|
||||||
|
result[name]= p
|
||||||
|
|
||||||
|
result = sorted(result.items(), key=lambda x:x[1])
|
||||||
|
|
||||||
|
print("p score")
|
||||||
|
for res in result[1:]:
|
||||||
|
print(res[0], res[1])
|
||||||
|
|
||||||
|
# df.boxplot(header[29], by="Tree", figsize=(17, 24))
|
||||||
|
|
||||||
|
# for idx, name in enumerate(header[1:]):
|
||||||
|
# F, p = f_oneway(df['Tree'], df[name])
|
||||||
|
# if p < 0.05 and p != 0:
|
||||||
|
# print(idx+1, name, '\t\t', p)
|
Loading…
Reference in New Issue
Block a user