normality Tests

parent c22942f3
Showing with 36 additions and 17 deletions
......@@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
./Examples/df_trans_merged_textflow.csv
\ No newline at end of file
......@@ -7,7 +7,8 @@ from statsmodels.stats.diagnostic import lilliefors
class Test():
#https://towardsdatascience.com/normality-tests-in-python-31e04aa4f411
def __init__(self,parametricTest=["Shapiro","D'Agostino","Anderson-Darling","Chi-Square","Lilliefors","Jarque–Bera","Kolmogorov-Smirnov"],nonParametricTest=["mannwhitneyu","wilcoxon","kruskal"]):
def __init__(self,normalityTest=["Shapiro","D'Agostino","Anderson-Darling","Chi-Square","Lilliefors","Jarque–Bera","Kolmogorov-Smirnov"], parametricTest=["mannwhitneyu","wilcoxon","kruskal"], nonParametricTest=["mannwhitneyu","wilcoxon","kruskal"]):
self.normalityTest = normalityTest
self.parametricTest = parametricTest
self.nonParametricTest = nonParametricTest
......@@ -19,52 +20,67 @@ class Test():
# Histograma
pass
def applyParametric(self,df):
def applyNormalTest(self,df):
testFinal = pd.DataFrame()
#Numeric Cols, hay que filtrar el df
for i in self.parametricTest:
testFinal.index = list(df.columns)
dicResult={}
for i in self.normalityTest:
if i == "Shapiro":
test = df.apply(lambda x: shapiro(x), axis=0)
test.index = ['Shapiro stat', 'Shapiro p-value']
test = test.transpose()
testFinal['Shapiro stat'] = list(test['Shapiro stat'])
testFinal['Shapiro p-value'] = list(test['Shapiro p-value'])
elif i == "D'Agostino":
test = df.apply(lambda x: normaltest(x), axis=0)
test.index = ["D'Agostino stat", "D'Agostino p-value"]
test = test.transpose()
testFinal["D'Agostino stat"] = list(test["D'Agostino stat"])
testFinal["D'Agostino p-value"] = list(test["D'Agostino p-value"])
elif i == "Anderson-Darling":
test = df.apply(lambda x: anderson(x), axis=0)
test.index = ['Anderson-Darling stat', 'Anderson-Darling crit_val', 'Anderson-Darling sig_level']
test = test.transpose()
pass
testFinal['Anderson-Darling stat'] = list(test['Anderson-Darling stat'])
testFinal['Anderson-Darling crit_val'] = list(test['Anderson-Darling crit_val'])
testFinal['Anderson-Darling sig_level'] = list(test['Anderson-Darling sig_level'])
elif i == "Chi-Square":
test = df.apply(lambda x: chisquare(x), axis=0)
test.index = ['Chi-Square stat', 'Chi-Square p-value']
test = test.transpose()
pass
testFinal['Chi-Square stat'] = list(test['Chi-Square stat'])
testFinal['Chi-Square p-value'] = list(test['Chi-Square p-value'])
elif i == "Lilliefors":
test = df.apply(lambda x: lilliefors(x), axis=0)
test.index = ['Lilliefors stat', 'Lilliefors p-value']
test = test.transpose()
pass
testFinal['Lilliefors stat'] = list(test['Lilliefors stat'])
testFinal['Lilliefors p-value'] = list(test['Lilliefors p-value'])
elif i == "Jarque–Bera":
test = df.apply(lambda x: jarque_bera(x), axis=0)
test.index = ['Shapiro stat', 'Shapiro p-value']
test.index = ['Jarque–Bera stat', 'Jarque–Bera p-value']
test = test.transpose()
pass
testFinal['Jarque–Bera stat'] = list(test['Jarque–Bera stat'])
testFinal['Jarque–Bera p-value'] = list(test['Jarque–Bera p-value'])
elif i == "Kolmogorov-Smirnov":
test = df.apply(lambda x: kstest(x, 'norm'), axis=0)
test.index = ["Kolmogorov-Smirnov stat", "Kolmogorov-Smirnov p-value"]
test = test.transpose()
testFinal['Kolmogorov-Smirnov stat'] = list(test['Kolmogorov-Smirnov stat'])
testFinal['Kolmogorov-Smirnov p-value'] = list(test['Kolmogorov-Smirnov p-value'])
for t in self.parametricTest:
for t in self.normalityTest:
if t != "Anderson-Darling":
print("Pass the test of"+t)
print(list(test[test[t+' p-value'] > 0.05].index))
print("Pass the test of "+t)
print(list(testFinal[testFinal[t+' p-value'] > 0.05].index))
dicResult[t] = list(testFinal[testFinal[t+' p-value'] > 0.05].index)
else:
for i in range(len(list(test[t+' crit_val'].index))):
sig_level, crit_val = test[t+' sig_level'][i], test[t+' crit_val'][i]
print("Pass the test of"+t)
print(list(test[test[t+' stat'] < crit_val].index),"at {sig_level} level of significance")
sig_level, crit_val = list(testFinal[t+' sig_level'])[0], list(testFinal[t+' crit_val'])[0]
for i in range(len(crit_val)):
print("Pass the test of "+t)
print(list(testFinal[testFinal[t+' stat'] < crit_val[i]].index),"at "+str(sig_level[i])+" level of significance")
dicResult[t+' '+sig_level[i]+' sig_lev'] = list(testFinal[testFinal[t+' p-value'] > 0.05].index)
return testFinal, dicResult
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment