Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SINAI
/
clef-pan2018
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
9622b455
authored
May 18, 2018
by
Arturo Montejo Ráez
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
extended meassures added
parent
c3a2ce28
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
375 additions
and
108 deletions
ComplexityFrench.py
ComplexityItalian.py
ComplexityLanguage.py
ComplexityPolish.py
ComplexitySpanish.py
ComplexityFrench.py
View file @
9622b455
...
...
@@ -72,28 +72,43 @@ class ComplexityFrench(ComplexityLanguage):
"""
self
.
config
+=
[
True
,
True
,
True
]
self
.
metricsStr
.
extend
([
'KANDEL-MODELS'
,
'DALE CHALL'
,
'SOL'
])
self
.
configExtend
+=
[
True
,
True
]
self
.
metricsStrExtend
.
extend
([
'MEAN RARE WORDS'
,
'STD RARE WORDS'
])
def
readability
(
self
):
#Number of low frequency words
count
=
0
lrarewords
=
[]
for
sentence
in
self
.
pos_content_sentences
:
count
=
0
for
w
in
sentence
:
if
w
.
get_form
()
not
in
self
.
listDaleChall
:
count
+=
1
N_difficultwords
=
count
lrarewords
.
append
(
count
)
#print('lrarewords', lrarewords)
#N_difficultwords = count
self
.
N_difficultwords
=
sum
(
lrarewords
)
#print("Number of rare words (N_rw): ", self.N_difficultwords, "\n")
self
.
mean_rw
=
np
.
mean
(
lrarewords
)
self
.
std_rw
=
np
.
std
(
lrarewords
)
#print("mean rare words: ", self.mean_rw)
#print("std rare words: ", self.std_rw)
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables
=
0
N_syllables3
=
0
for
words
in
self
.
listwords
:
count
=
0
for
character
in
words
:
lwords
=
[]
for
sentence
in
self
.
pos_content_sentences
:
for
w
in
sentence
:
lwords
.
append
(
w
.
get_form
())
count
=
0
for
character
in
lwords
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
if
count
>=
3
:
N_syllables3
+=
1
self
.
N_syllables
=
N_syllables
self
.
N_syllables3
=
N_syllables3
...
...
@@ -103,11 +118,12 @@ class ComplexityFrench(ComplexityLanguage):
#print("KANDEL-MODELS: ", kandelmodelsreadability, "\n")
self
.
kandelmodelsreadability
=
kandelmodelsreadability
dalechallreadability
=
15.79
*
(
N_difficultwords
/
self
.
N_words
)
+
0.04906
*
(
self
.
N_words
/
self
.
N_sentences
)
dalechallreadability
=
15.79
*
(
self
.
N_difficultwords
/
self
.
N_words
)
+
0.04906
*
(
self
.
N_words
/
self
.
N_sentences
)
#print("DALE CHALL: ", dalechallreadability, "\n")
self
.
dalechallreadability
=
dalechallreadability
return
self
.
kandelmodelsreadability
,
self
.
dalechallreadability
return
self
.
kandelmodelsreadability
,
self
.
dalechallreadability
,
self
.
mean_rw
,
self
.
std_rw
def
ageReadability
(
self
):
...
...
@@ -142,3 +158,24 @@ class ComplexityFrench(ComplexityLanguage):
return
metrics
def
calcMetricsExtend
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metricsExtend
=
super
()
.
calcMetricsExtend
(
text
)
metricsFrExtend
=
self
.
metricsStrExtend
readability
=
None
for
i
in
range
(
len
(
metricsExtend
)
-
1
,
len
(
metricsFrExtend
)):
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
metricsFrExtend
[
i
]
==
'MEAN RARE WORDS'
:
readability
=
self
.
readability
()
metricsExtend
[
'MEAN RARE WORDS'
]
=
readability
[
2
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
metricsFrExtend
[
i
]
==
'STD RARE WORDS'
:
readability
=
self
.
readability
()
metricsExtend
[
'STD RARE WORDS'
]
=
readability
[
3
]
return
metricsExtend
ComplexityItalian.py
View file @
9622b455
...
...
@@ -62,9 +62,11 @@ class ComplexityItalian():
Si config == None se calculan todas las métricas de complejidad soportadas
"""
self
.
config
=
[
True
,
True
,
True
,
True
,
True
,
True
]
self
.
metricsIt
=
[
'PUNCTUATION MARKS'
,
'SCI'
,
'ARI'
,
'MU'
,
'FLESCH-VACA'
,
'GULPEASE'
]
pass
self
.
metricsIt
=
[
'AVERAGE PUNCTUATION MARKS'
,
'SCI'
,
'ARI'
,
'MU'
,
'FLESCH-VACA'
,
'GULPEASE'
]
self
.
configExtend
=
[
True
,
True
,
True
,
True
,
True
]
self
.
metricsItExtend
=
[
'MEAN WORDS'
,
'STD WORDS'
,
'COMPLEX SENTENCES'
,
'MEAN SYLLABLES'
,
'STD SYLLABLES'
]
def
textProcessing
(
self
,
text
):
...
...
@@ -96,24 +98,42 @@ class ComplexityItalian():
#Solo nos interesa contar los tokens que sean signo de puntuación.
#Number of words.
punctuation
=
[]
N_words
=
[]
lsentences
=
[]
for
words
in
self
.
sentences
:
lwords
=
[]
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
N_words
.
append
(
w
.
get_form
())
lwords
.
append
(
w
.
get_form
())
lsentences
.
append
(
len
(
lwords
))
#print('list sentences: ',lsentences)
self
.
N_words
=
sum
(
lsentences
)
#print('Number of words (N_w): ', self.N_words, '\n' )
self
.
mean_words
=
np
.
mean
(
lsentences
)
self
.
std_words
=
np
.
std
(
lsentences
)
#print('media', np.mean(lsentences))
#print('std', np.std(lsentences))
#print('Las palabras del texto son : ', N_words)
self
.
N_words
=
len
(
N_words
)
#
self.N_words = len(N_words)
#print('Number of words (N_w): ', self.N_words, '\n' )
self
.
N_punctuation
=
len
(
punctuation
)
self
.
punctuation
=
punctuation
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return
self
.
N_punctuation
,
self
.
punctuation
,
self
.
N_words
if
self
.
N_words
==
0
:
punctuation_over_words
=
0
else
:
punctuation_over_words
=
self
.
N_punctuation
/
self
.
N_words
self
.
punctuation_over_words
=
punctuation_over_words
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return
self
.
punctuation_over_words
,
self
.
mean_words
,
self
.
std_words
,
self
.
N_punctuation
,
self
.
punctuation
,
self
.
N_words
def
sentenceComplexity
(
self
):
...
...
@@ -182,25 +202,52 @@ class ComplexityItalian():
def
mureadability
(
self
):
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables
=
0
N_syllables3
=
0
for
words
in
self
.
listwords
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
self
.
N_syllables
=
N_syllables
punctuation
=
[]
lsyllablesentence
=
[]
for
words
in
self
.
sentences
:
lwords
=
[]
N_syllables
=
0
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
lwords
.
append
(
w
.
get_form
())
#print('lwords', lwords)
for
words
in
lwords
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
lsyllablesentence
.
append
(
N_syllables
)
#print('lsyllablesentence', lsyllablesentence)
self
.
N_syllables
=
sum
(
lsyllablesentence
)
self
.
N_syllables3
=
N_syllables3
self
.
mean_syllables
=
np
.
mean
(
lsyllablesentence
)
self
.
std_syllables
=
np
.
std
(
lsyllablesentence
)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#Number of letters
listwords
=
[]
for
words
in
self
.
sentences
:
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
listwords
.
append
(
w
.
get_form
())
N_letters
=
0
letters
=
[]
vecletters
=
[]
for
word
in
self
.
listwords
:
for
word
in
listwords
:
if
re
.
match
(
'[a-zA-Z]|á|ó|í|ú|é'
,
word
):
letters
.
append
(
word
)
N_letters
+=
len
(
word
)
...
...
@@ -217,7 +264,8 @@ class ComplexityItalian():
#print("READABILITY MU: ", mu, "\n")
self
.
mu
=
mu
return
self
.
mu
,
self
.
N_syllables
,
self
.
N_syllables3
,
self
.
letters
,
self
.
N_letters
,
self
.
vecletters
return
self
.
mu
,
self
.
mean_syllables
,
self
.
std_syllables
,
self
.
N_syllables
,
self
.
N_syllables3
,
self
.
letters
,
self
.
N_letters
,
self
.
vecletters
def
readability
(
self
):
...
...
@@ -244,9 +292,9 @@ class ComplexityItalian():
for
i
in
range
(
0
,
len
(
self
.
metricsIt
)):
if
self
.
config
==
None
or
self
.
config
[
i
]
and
self
.
metricsIt
[
i
]
==
'PUNCTUATION MARKS'
:
if
self
.
config
==
None
or
self
.
config
[
i
]
and
self
.
metricsIt
[
i
]
==
'
AVERAGE
PUNCTUATION MARKS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metrics
[
'PUNCTUATION MARKS'
]
=
punctuationmarks
[
0
]
metrics
[
'
AVERAGE
PUNCTUATION MARKS'
]
=
punctuationmarks
[
0
]
if
self
.
config
==
None
or
self
.
config
[
i
]
and
self
.
metricsIt
[
i
]
==
'SCI'
:
sentencecomplexity
=
self
.
sentenceComplexity
()
metrics
[
'SCI'
]
=
self
.
SCI
...
...
@@ -276,7 +324,41 @@ class ComplexityItalian():
return
self
.
pos_sentences
def
calcMetricsExtend
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metricsExtend
=
{}
punctuationmarks
=
None
sentencecomplexity
=
None
mureadability
=
None
for
i
in
range
(
0
,
len
(
self
.
metricsItExtend
)):
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsItExtend
[
i
]
==
'MEAN WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'MEAN WORDS'
]
=
punctuationmarks
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsItExtend
[
i
]
==
'STD WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'STD WORDS'
]
=
punctuationmarks
[
2
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsItExtend
[
i
]
==
'COMPLEX SENTENCES'
:
sentencecomplexity
=
self
.
sentenceComplexity
()
metricsExtend
[
'COMPLEX SENTENCES'
]
=
sentencecomplexity
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsItExtend
[
i
]
==
'MEAN SYLLABLES'
:
mureadability
=
self
.
mureadability
()
metricsExtend
[
'MEAN SYLLABLES'
]
=
mureadability
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsItExtend
[
i
]
==
'STD SYLLABLES'
:
mureadability
=
self
.
mureadability
()
metricsExtend
[
'STD SYLLABLES'
]
=
mureadability
[
2
]
return
metricsExtend
...
...
ComplexityLanguage.py
View file @
9622b455
...
...
@@ -67,6 +67,9 @@ class ComplexityLanguage():
"""
self
.
config
=
[
True
,
True
,
True
,
True
]
self
.
metricsStr
=
[
'AVERAGE PUNCTUATION MARKS'
,
'SCI'
,
'ARI'
,
'MU'
]
self
.
configExtend
=
[
True
,
True
,
True
,
True
,
True
]
self
.
metricsStrExtend
=
[
'MEAN WORDS'
,
'STD WORDS'
,
'COMPLEX SENTENCES'
,
'MEAN SYLLABLES'
,
'STD SYLLABLES'
]
pass
...
...
@@ -110,18 +113,22 @@ class ComplexityLanguage():
#Solo nos interesa contar los tokens que sean signo de puntuación.
#Number of words.
punctuation
=
[]
lwords
=
[]
lsentences
=
[]
for
words
in
self
.
sentences
:
lwords
=
[]
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
lwords
.
append
(
w
.
get_form
())
lsentences
.
append
(
len
(
lwords
))
self
.
N_words
=
len
(
lword
s
)
self
.
N_words
=
sum
(
lsentence
s
)
#print('Number of words (N_w): ', self.N_words, '\n' )
self
.
mean_words
=
np
.
mean
(
lsentences
)
self
.
std_words
=
np
.
std
(
lsentences
)
self
.
N_punctuation
=
len
(
punctuation
)
self
.
punctuation
=
punctuation
...
...
@@ -133,7 +140,7 @@ class ComplexityLanguage():
self
.
punctuation_over_words
=
punctuation_over_words
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return
self
.
punctuation_over_words
,
self
.
N_punctuation
,
self
.
punctuation
,
self
.
N_words
return
self
.
punctuation_over_words
,
self
.
mean_words
,
self
.
std_words
,
self
.
N_punctuation
,
self
.
punctuation
,
self
.
N_words
def
sentenceComplexity
(
self
):
...
...
@@ -199,58 +206,85 @@ class ComplexityLanguage():
def
mureadability
(
self
):
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables
=
0
N_syllables3
=
0
for
words
in
self
.
listwords
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
self
.
N_syllables
=
N_syllables
punctuation
=
[]
lsyllablesentence
=
[]
for
words
in
self
.
sentences
:
lwords
=
[]
N_syllables
=
0
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
lwords
.
append
(
w
.
get_form
())
#print('lwords', lwords)
for
words
in
lwords
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
lsyllablesentence
.
append
(
N_syllables
)
#print('lsyllablesentence', lsyllablesentence)
self
.
N_syllables
=
sum
(
lsyllablesentence
)
self
.
N_syllables3
=
N_syllables3
self
.
mean_syllables
=
np
.
mean
(
lsyllablesentence
)
self
.
std_syllables
=
np
.
std
(
lsyllablesentence
)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#Number of letters
listwords
=
[]
for
words
in
self
.
sentences
:
for
w
in
words
:
if
re
.
match
(
'F.*'
,
w
.
get_tag
()):
punctuation
.
append
(
w
.
get_form
())
else
:
listwords
.
append
(
w
.
get_form
())
N_letters
=
0
letters
=
[]
vecletters
=
[]
for
word
in
self
.
listwords
:
for
word
in
listwords
:
if
re
.
match
(
'[a-zA-Z]|á|ó|í|ú|é'
,
word
):
letters
.
append
(
word
)
N_letters
+=
len
(
word
)
vecletters
.
append
(
len
(
word
))
self
.
letters
=
letters
self
.
N_letters
=
N_letters
self
.
vecletters
=
vecletters
x
=
self
.
N_letters
/
self
.
N_words
varianza
=
np
.
var
(
self
.
vecletters
)
mu
=
(
self
.
N_words
/
(
self
.
N_words
-
1
))
*
(
x
/
varianza
)
*
100
#print("READABILITY MU: ", mu, "\n")
self
.
mu
=
mu
return
self
.
mu
,
self
.
N_syllables
,
self
.
N_syllables3
,
self
.
letters
,
self
.
N_letters
,
self
.
vecletters
return
self
.
mu
,
self
.
mean_syllables
,
self
.
std_syllables
,
self
.
N_syllables
,
self
.
N_syllables3
,
self
.
letters
,
self
.
N_letters
,
self
.
vecletters
def
calcMetrics
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metrics
=
{}
punctuationMarks
=
None
autoreadability
=
None
sentencecomplexity
=
None
mureadability
=
None
for
i
in
range
(
0
,
len
(
self
.
metricsStr
)):
if
self
.
config
==
None
or
self
.
config
[
i
]
and
self
.
metricsStr
[
i
]
==
'AVERAGE PUNCTUATION MARKS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metrics
[
'AVERAGE PUNCTUATION MARKS'
]
=
punctuationmarks
[
0
]
...
...
@@ -263,8 +297,43 @@ class ComplexityLanguage():
if
self
.
config
==
None
or
self
.
config
[
i
]
and
self
.
metricsStr
[
i
]
==
'MU'
:
mureadability
=
self
.
mureadability
()
metrics
[
'MU'
]
=
mureadability
[
0
]
return
metrics
return
metrics
def
calcMetricsExtend
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metricsExtend
=
{}
punctuationmarks
=
None
sentencecomplexity
=
None
mureadability
=
None
for
i
in
range
(
0
,
len
(
self
.
metricsStrExtend
)):
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'MEAN WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'MEAN WORDS'
]
=
punctuationmarks
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'STD WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'STD WORDS'
]
=
punctuationmarks
[
2
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'COMPLEX SENTENCES'
:
sentencecomplexity
=
self
.
sentenceComplexity
()
metricsExtend
[
'COMPLEX SENTENCES'
]
=
sentencecomplexity
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'MEAN SYLLABLES'
:
mureadability
=
self
.
mureadability
()
metricsExtend
[
'MEAN SYLLABLES'
]
=
mureadability
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'STD SYLLABLES'
:
mureadability
=
self
.
mureadability
()
metricsExtend
[
'STD SYLLABLES'
]
=
mureadability
[
2
]
return
metricsExtend
def
getPOS
(
self
,
text
):
self
.
textProcessing
(
text
)
...
...
ComplexityPolish.py
View file @
9622b455
...
...
@@ -23,7 +23,9 @@ class ComplexityPolish():
self
.
config
=
[
True
,
True
,
True
,
True
,
True
,
True
]
self
.
metricsStr
=
[
'AVERAGE PUNCTUATION MARKS'
,
'ARI'
,
'FOG'
,
'FLESCH'
,
'FLESCH-KINCAID'
,
'PISAREK'
]
pass
self
.
configExtend
=
[
True
,
True
,
True
,
True
]
self
.
metricsStrExtend
=
[
'MEAN WORDS'
,
'STD WORDS'
,
'MEAN SYLLABLES'
,
'STD SYLLABLES'
]
def
textProcessing
(
self
,
text
):
text
=
text
.
replace
(
u'
\xa0
'
,
u' '
)
...
...
@@ -45,7 +47,7 @@ class ComplexityPolish():
N_text_tokens
=
len
(
self
.
text_tokens
)
self
.
N_text_tokens
=
N_text_tokens
#print('Tokens: ', self.N_text_tokens)
# y ahora reorganizamos las oraciones a partir de los puntos aislados
sentences
=
[]
ini
=
0
...
...
@@ -64,63 +66,80 @@ class ComplexityPolish():
N_sentences
=
len
(
sentences
)
self
.
N_sentences
=
N_sentences
#print('Sentences: ',self.sentences)
N_charac
=
0
for
word
in
self
.
text_tokens
:
N_charac
+=
len
(
word
)
self
.
N_charac
=
N_charac
#print('The number the character is: ', self.N_charac)
N_syllables
=
0
N_syllables3
=
0
for
words
in
self
.
text_tokens
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
self
.
N_syllables
=
N_syllables
self
.
N_syllables3
=
N_syllables3
#print('The number of syllables is: ',self.N_syllables)
#print('The number of syllables3 is: ', self.N_syllables3)
return
self
.
text_tokens
,
self
.
N_text_tokens
,
self
.
sentences
,
self
.
N_sentences
,
self
.
N_charac
,
self
.
N_syllables
,
self
.
N_syllables3
return
self
.
text_tokens
,
self
.
N_text_tokens
,
self
.
sentences
,
self
.
N_sentences
,
self
.
N_charac
def
punctuationMarks
(
self
):
N_punctuation
=
0
letters
=
[]
lsentences
=
[]
N_letters
=
0
for
word
in
self
.
text_tokens
:
if
re
.
match
(
'[a-zA-Z]|á|ó|í|ú|é'
,
word
):
letters
.
append
(
word
)
N_letters
+=
len
(
word
)
else
:
N_punctuation
+=
1
self
.
words
=
letters
self
.
N_words
=
len
(
letters
)
#print('N_words: ', self.N_words)
N_syllables3
=
0
lsyllablesentence
=
[]
for
words
in
self
.
sentences
:
lwords
=
[]
N_syllables
=
0
for
w
in
words
:
if
re
.
match
(
'[a-zA-Z]|á|ó|í|ú|é'
,
w
):
lwords
.
append
(
w
)
letters
.
append
(
w
)
N_letters
+=
len
(
w
)
else
:
N_punctuation
+=
1
lsentences
.
append
(
len
(
lwords
))
for
words
in
lwords
:
count
=
0
for
character
in
words
:
if
re
.
match
(
'a|e|i|o|u|y'
,
character
):
N_syllables
+=
1
count
+=
1
if
count
>=
3
:
N_syllables3
+=
1
lsyllablesentence
.
append
(
N_syllables
)
#print('lsyllablesentence', lsyllablesentence)
self
.
N_syllables
=
sum
(
lsyllablesentence
)
self
.
N_syllables3
=
N_syllables3
self
.
mean_syllables
=
np
.
mean
(
lsyllablesentence
)
self
.
std_syllables
=
np
.
std
(
lsyllablesentence
)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#print('list sentences: ',lsentences)
self
.
N_words
=
sum
(
lsentences
)
#print('Number of words (N_w): ', self.N_words, '\n' )
self
.
mean_words
=
np
.
mean
(
lsentences
)
self
.
std_words
=
np
.
std
(
lsentences
)
#print('media', np.mean(lsentences))
#print('std', np.std(lsentences))
self
.
words
=
letters
self
.
N_letters
=
N_letters
self
.
N_punctuation
=
N_punctuation
if
self
.
N_words
==
0
:
punctuation_over_words
=
0
else
:
punctuation_over_words
=
self
.
N_punctuation
/
self
.
N_words
self
.
punctuation_over_words
=
punctuation_over_words
#print('The number of letter is: ', N_letters)
#print('The list of letter is: ', letters)
#print('The PUNCTUATION MARKS is: ', self.N_punctuation, '\n')
return
self
.
punctuation_over_words
,
self
.
mean_words
,
self
.
std_words
,
self
.
mean_syllables
,
self
.
std_syllables
,
self
.
N_punctuation
,
self
.
words
,
self
.
N_words
,
self
.
N_letters
,
self
.
N_syllables
,
self
.
N_syllables3
return
self
.
punctuation_over_words
,
self
.
N_punctuation
,
self
.
words
,
self
.
N_words
,
self
.
N_letters
def
readability
(
self
):
...
...
@@ -198,3 +217,33 @@ class ComplexityPolish():
self
.
pos_sentences
=
pos_sentences
return
self
.
pos_sentences
def
calcMetricsExtend
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metricsExtend
=
{}
textprocessing
=
None
punctuationmarks
=
None
for
i
in
range
(
0
,
len
(
self
.
metricsStrExtend
)):
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'MEAN WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'MEAN WORDS'
]
=
punctuationmarks
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'STD WORDS'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'STD WORDS'
]
=
punctuationmarks
[
2
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'MEAN SYLLABLES'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'MEAN SYLLABLES'
]
=
punctuationmarks
[
3
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
self
.
metricsStrExtend
[
i
]
==
'STD SYLLABLES'
:
punctuationmarks
=
self
.
punctuationMarks
()
metricsExtend
[
'STD SYLLABLES'
]
=
punctuationmarks
[
4
]
return
metricsExtend
ComplexitySpanish.py
View file @
9622b455
...
...
@@ -47,6 +47,9 @@ class ComplexitySpanish(ComplexityLanguage):
self
.
config
+=
[
True
,
True
,
True
,
True
,
True
,
True
,
True
,
True
,
True
,
True
,
True
,
True
]
self
.
metricsStr
.
extend
([
'MaxDEPTH'
,
'MinDEPTH'
,
'MeanDEPTH'
,
'StdDEPTH'
,
'LC'
,
'SSR'
,
'HUERTA'
,
'IFSZ'
,
'POLINI'
,
'MINIMUN AGE'
,
'SOL'
,
'CRAWFORD'
])
self
.
configExtend
+=
[
True
,
True
]
self
.
metricsStrExtend
.
extend
([
'MEAN RARE WORDS'
,
'STD RARE WORDS'
])
def
textProcessing
(
self
,
text
):
text
=
text
.
replace
(
u'
\xa0
'
,
u' '
)
.
replace
(
'"'
,
''
)
# meter todas las funciones en una patron de los tokens válidos
...
...
@@ -166,21 +169,24 @@ class ComplexitySpanish(ComplexityLanguage):
#Number of rare words
byfreq
=
sorted
(
self
.
crea
,
key
=
self
.
crea
.
__getitem__
,
reverse
=
True
)
byfreq
=
byfreq
[:
1500
]
count
=
0
lrarewords
=
[]
for
sentence
in
self
.
pos_content_sentences
:
count
=
0
for
w
in
sentence
:
if
w
.
get_form
()
.
lower
()
not
in
byfreq
:
count
+=
1
lrarewords
.
append
(
count
)
N_rw
=
count
self
.
N_rw
=
N_rw
self
.
N_rw
=
sum
(
lrarewords
)
#print("Number of rare words (N_rw): ", self.N_rw, "\n")
self
.
mean_rw
=
np
.
mean
(
lrarewords
)
self
.
std_rw
=
np
.
std
(
lrarewords
)
SSR
=
1.609
*
(
self
.
N_words
/
self
.
N_sentences
)
+
331.8
*
(
self
.
N_rw
/
self
.
N_words
)
+
22.0
self
.
SSR
=
SSR
#print ("SPAULDING SPANISH READABILITY (SSR) ", self.SSR, "\n")
return
self
.
SSR
,
self
.
N_rw
return
self
.
SSR
,
self
.
mean_rw
,
self
.
std_rw
,
self
.
N_rw
def
readability
(
self
):
...
...
@@ -300,4 +306,28 @@ class ComplexitySpanish(ComplexityLanguage):
metrics
[
'CRAWFORD'
]
=
self
.
yearsCrawford
()
return
metrics
def
calcMetricsExtend
(
self
,
text
):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self
.
textProcessing
(
text
)
metricsExtend
=
super
()
.
calcMetricsExtend
(
text
)
metricsEsExtend
=
self
.
metricsStrExtend
ssreadability
=
None
for
i
in
range
(
len
(
metricsExtend
)
-
1
,
len
(
metricsEsExtend
)):
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
metricsEsExtend
[
i
]
==
'MEAN RARE WORDS'
:
ssreadability
=
self
.
ssReadability
()
metricsExtend
[
'MEAN RARE WORDS'
]
=
ssreadability
[
1
]
if
self
.
configExtend
==
None
or
self
.
configExtend
[
i
]
and
metricsEsExtend
[
i
]
==
'STD RARE WORDS'
:
ssreadability
=
self
.
ssReadability
()
metricsExtend
[
'STD RARE WORDS'
]
=
ssreadability
[
2
]
return
metricsExtend
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment