Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
textflow
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
1
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
80bdf7dc
authored
Oct 09, 2023
by
Estrella Vallecillo
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
normality Tests
parent
c22942f3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
17 deletions
.gitignore
textflow/Test.py
.gitignore
View file @
80bdf7dc
...
...
@@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
./Examples/df_trans_merged_textflow.csv
\ No newline at end of file
textflow/Test.py
View file @
80bdf7dc
...
...
@@ -7,7 +7,8 @@ from statsmodels.stats.diagnostic import lilliefors
class
Test
():
#https://towardsdatascience.com/normality-tests-in-python-31e04aa4f411
def
__init__
(
self
,
parametricTest
=
[
"Shapiro"
,
"D'Agostino"
,
"Anderson-Darling"
,
"Chi-Square"
,
"Lilliefors"
,
"Jarque–Bera"
,
"Kolmogorov-Smirnov"
],
nonParametricTest
=
[
"mannwhitneyu"
,
"wilcoxon"
,
"kruskal"
]):
def
__init__
(
self
,
normalityTest
=
[
"Shapiro"
,
"D'Agostino"
,
"Anderson-Darling"
,
"Chi-Square"
,
"Lilliefors"
,
"Jarque–Bera"
,
"Kolmogorov-Smirnov"
],
parametricTest
=
[
"mannwhitneyu"
,
"wilcoxon"
,
"kruskal"
],
nonParametricTest
=
[
"mannwhitneyu"
,
"wilcoxon"
,
"kruskal"
]):
self
.
normalityTest
=
normalityTest
self
.
parametricTest
=
parametricTest
self
.
nonParametricTest
=
nonParametricTest
...
...
@@ -19,52 +20,67 @@ class Test():
# Histograma
pass
def
apply
Parametric
(
self
,
df
):
def
apply
NormalTest
(
self
,
df
):
testFinal
=
pd
.
DataFrame
()
#Numeric Cols, hay que filtrar el df
for
i
in
self
.
parametricTest
:
testFinal
.
index
=
list
(
df
.
columns
)
dicResult
=
{}
for
i
in
self
.
normalityTest
:
if
i
==
"Shapiro"
:
test
=
df
.
apply
(
lambda
x
:
shapiro
(
x
),
axis
=
0
)
test
.
index
=
[
'Shapiro stat'
,
'Shapiro p-value'
]
test
=
test
.
transpose
()
testFinal
[
'Shapiro stat'
]
=
list
(
test
[
'Shapiro stat'
])
testFinal
[
'Shapiro p-value'
]
=
list
(
test
[
'Shapiro p-value'
])
elif
i
==
"D'Agostino"
:
test
=
df
.
apply
(
lambda
x
:
normaltest
(
x
),
axis
=
0
)
test
.
index
=
[
"D'Agostino stat"
,
"D'Agostino p-value"
]
test
=
test
.
transpose
()
testFinal
[
"D'Agostino stat"
]
=
list
(
test
[
"D'Agostino stat"
])
testFinal
[
"D'Agostino p-value"
]
=
list
(
test
[
"D'Agostino p-value"
])
elif
i
==
"Anderson-Darling"
:
test
=
df
.
apply
(
lambda
x
:
anderson
(
x
),
axis
=
0
)
test
.
index
=
[
'Anderson-Darling stat'
,
'Anderson-Darling crit_val'
,
'Anderson-Darling sig_level'
]
test
=
test
.
transpose
()
pass
testFinal
[
'Anderson-Darling stat'
]
=
list
(
test
[
'Anderson-Darling stat'
])
testFinal
[
'Anderson-Darling crit_val'
]
=
list
(
test
[
'Anderson-Darling crit_val'
])
testFinal
[
'Anderson-Darling sig_level'
]
=
list
(
test
[
'Anderson-Darling sig_level'
])
elif
i
==
"Chi-Square"
:
test
=
df
.
apply
(
lambda
x
:
chisquare
(
x
),
axis
=
0
)
test
.
index
=
[
'Chi-Square stat'
,
'Chi-Square p-value'
]
test
=
test
.
transpose
()
pass
testFinal
[
'Chi-Square stat'
]
=
list
(
test
[
'Chi-Square stat'
])
testFinal
[
'Chi-Square p-value'
]
=
list
(
test
[
'Chi-Square p-value'
])
elif
i
==
"Lilliefors"
:
test
=
df
.
apply
(
lambda
x
:
lilliefors
(
x
),
axis
=
0
)
test
.
index
=
[
'Lilliefors stat'
,
'Lilliefors p-value'
]
test
=
test
.
transpose
()
pass
testFinal
[
'Lilliefors stat'
]
=
list
(
test
[
'Lilliefors stat'
])
testFinal
[
'Lilliefors p-value'
]
=
list
(
test
[
'Lilliefors p-value'
])
elif
i
==
"Jarque–Bera"
:
test
=
df
.
apply
(
lambda
x
:
jarque_bera
(
x
),
axis
=
0
)
test
.
index
=
[
'
Shapiro stat'
,
'Shapiro
p-value'
]
test
.
index
=
[
'
Jarque–Bera stat'
,
'Jarque–Bera
p-value'
]
test
=
test
.
transpose
()
pass
testFinal
[
'Jarque–Bera stat'
]
=
list
(
test
[
'Jarque–Bera stat'
])
testFinal
[
'Jarque–Bera p-value'
]
=
list
(
test
[
'Jarque–Bera p-value'
])
elif
i
==
"Kolmogorov-Smirnov"
:
test
=
df
.
apply
(
lambda
x
:
kstest
(
x
,
'norm'
),
axis
=
0
)
test
.
index
=
[
"Kolmogorov-Smirnov stat"
,
"Kolmogorov-Smirnov p-value"
]
test
=
test
.
transpose
()
testFinal
[
'Kolmogorov-Smirnov stat'
]
=
list
(
test
[
'Kolmogorov-Smirnov stat'
])
testFinal
[
'Kolmogorov-Smirnov p-value'
]
=
list
(
test
[
'Kolmogorov-Smirnov p-value'
])
for
t
in
self
.
parametric
Test
:
for
t
in
self
.
normality
Test
:
if
t
!=
"Anderson-Darling"
:
print
(
"Pass the test of"
+
t
)
print
(
list
(
test
[
test
[
t
+
' p-value'
]
>
0.05
]
.
index
))
print
(
"Pass the test of "
+
t
)
print
(
list
(
testFinal
[
testFinal
[
t
+
' p-value'
]
>
0.05
]
.
index
))
dicResult
[
t
]
=
list
(
testFinal
[
testFinal
[
t
+
' p-value'
]
>
0.05
]
.
index
)
else
:
for
i
in
range
(
len
(
list
(
test
[
t
+
' crit_val'
]
.
index
))):
sig_level
,
crit_val
=
test
[
t
+
' sig_level'
][
i
],
test
[
t
+
' crit_val'
][
i
]
print
(
"Pass the test of"
+
t
)
print
(
list
(
test
[
test
[
t
+
' stat'
]
<
crit_val
]
.
index
),
"at {sig_level} level of significance"
)
sig_level
,
crit_val
=
list
(
testFinal
[
t
+
' sig_level'
])[
0
],
list
(
testFinal
[
t
+
' crit_val'
])[
0
]
for
i
in
range
(
len
(
crit_val
)):
print
(
"Pass the test of "
+
t
)
print
(
list
(
testFinal
[
testFinal
[
t
+
' stat'
]
<
crit_val
[
i
]]
.
index
),
"at "
+
str
(
sig_level
[
i
])
+
" level of significance"
)
dicResult
[
t
+
' '
+
sig_level
[
i
]
+
' sig_lev'
]
=
list
(
testFinal
[
testFinal
[
t
+
' p-value'
]
>
0.05
]
.
index
)
return
testFinal
,
dicResult
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment