Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SINAI
/
texty
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
182e38fe
authored
Mar 24, 2022
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Bug fixes and refactorization
parent
a88860e6
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
20 deletions
.gitignore
__init__.py
examples/example.ipynb
src/texty/complexity.py
.gitignore
View file @
182e38fe
__pycache__
__pycache__
*.pyc
*.pyc
.ipynb_checkpoints
\ No newline at end of file
__init__.py
0 → 100644
View file @
182e38fe
File mode changed
examples/example.ipynb
View file @
182e38fe
This diff could not be displayed because it is too large.
src/texty/complexity.py
View file @
182e38fe
from
functools
import
reduce
from
functools
import
reduce
import
spacy
import
math
import
math
import
syllables
import
os
import
os
import
re
import
re
import
numpy
as
np
import
syllables
crea_total_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'CREA_total.txt'
)
crea_total_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'CREA_total.txt'
)
class
ComplexityAnalyzer
():
class
ComplexityAnalyzer
():
...
@@ -44,15 +45,15 @@ class ComplexityAnalyzer():
...
@@ -44,15 +45,15 @@ class ComplexityAnalyzer():
self
.
pos_content_sentences
=
pos_content_sentences
self
.
pos_content_sentences
=
pos_content_sentences
def
get_all_metrics
(
self
):
def
get_all_metrics
(
self
):
self
.
punctuation
M
arks
()
self
.
punctuation
_m
arks
()
self
.
lexical
C
omplexity
()
self
.
lexical
_c
omplexity
()
self
.
ss
R
eadability
()
self
.
ss
_r
eadability
()
self
.
sentence
C
omplexity
()
self
.
sentence
_c
omplexity
()
self
.
auto
R
eadability
()
self
.
auto
_r
eadability
()
self
.
embedding
D
epth
()
self
.
embedding
_d
epth
()
self
.
readability
()
self
.
readability
()
self
.
age
R
eadability
()
self
.
age
_r
eadability
()
self
.
years
C
rawford
()
self
.
years
_c
rawford
()
metrics
=
{
metrics
=
{
'npunct'
:
self
.
npunctuation
,
# number of punctuation marks
'npunct'
:
self
.
npunctuation
,
# number of punctuation marks
...
@@ -82,7 +83,7 @@ class ComplexityAnalyzer():
...
@@ -82,7 +83,7 @@ class ComplexityAnalyzer():
}
}
return
metrics
return
metrics
def
punctuation
M
arks
(
self
):
def
punctuation
_m
arks
(
self
):
# Solo nos interesa contar los tokens que sean signo de puntuación.
# Solo nos interesa contar los tokens que sean signo de puntuación.
# Number of words.
# Number of words.
punctuation
=
[]
punctuation
=
[]
...
@@ -103,12 +104,12 @@ class ComplexityAnalyzer():
...
@@ -103,12 +104,12 @@ class ComplexityAnalyzer():
return
self
.
npunctuation
,
self
.
punctuation
,
self
.
N_words
return
self
.
npunctuation
,
self
.
punctuation
,
self
.
N_words
def
lexical
C
omplexity
(
self
):
def
lexical
_c
omplexity
(
self
):
# Number of low frequency words
# Number of low frequency words
count
=
0
count
=
0
for
sentence
in
self
.
pos_content_sentences
:
for
sentence
in
self
.
pos_content_sentences
:
for
w
in
sentence
:
for
w
in
sentence
:
if
w
.
text
not
in
self
.
crea
:
if
w
.
text
not
in
self
.
lang_word_freqs
:
count
+=
1
count
+=
1
N_lfw
=
count
N_lfw
=
count
self
.
N_lfw
=
N_lfw
self
.
N_lfw
=
N_lfw
...
@@ -142,12 +143,12 @@ class ComplexityAnalyzer():
...
@@ -142,12 +143,12 @@ class ComplexityAnalyzer():
return
self
.
N_lfw
,
self
.
N_cw
,
self
.
N_dcw
,
self
.
N_s
,
self
.
LDI
,
self
.
ILFW
,
self
.
LC
return
self
.
N_lfw
,
self
.
N_cw
,
self
.
N_dcw
,
self
.
N_s
,
self
.
LDI
,
self
.
ILFW
,
self
.
LC
def
ss
R
eadability
(
self
):
def
ss
_r
eadability
(
self
):
'''
'''
Spaulding Score of Readability
Spaulding Score of Readability
'''
'''
#Number of rare words
#Number of rare words
byfreq
=
sorted
(
self
.
crea
,
key
=
self
.
crea
.
__getitem__
,
reverse
=
True
)
byfreq
=
sorted
(
self
.
lang_word_freqs
,
key
=
self
.
lang_word_freqs
.
__getitem__
,
reverse
=
True
)
byfreq
=
byfreq
[:
1500
]
byfreq
=
byfreq
[:
1500
]
count
=
0
count
=
0
for
sentence
in
self
.
pos_content_sentences
:
for
sentence
in
self
.
pos_content_sentences
:
...
@@ -162,7 +163,7 @@ class ComplexityAnalyzer():
...
@@ -162,7 +163,7 @@ class ComplexityAnalyzer():
return
self
.
N_rw
,
self
.
SSR
return
self
.
N_rw
,
self
.
SSR
def
sentence
C
omplexity
(
self
):
def
sentence
_c
omplexity
(
self
):
#Number of complex sentences
#Number of complex sentences
N_cs
=
0
N_cs
=
0
for
sentence
in
self
.
sentences
:
for
sentence
in
self
.
sentences
:
...
@@ -192,7 +193,7 @@ class ComplexityAnalyzer():
...
@@ -192,7 +193,7 @@ class ComplexityAnalyzer():
return
self
.
N_cs
,
self
.
ASL
,
self
.
CS
,
self
.
SCI
return
self
.
N_cs
,
self
.
ASL
,
self
.
CS
,
self
.
SCI
def
auto
R
eadability
(
self
):
def
auto
_r
eadability
(
self
):
# Number of characters
# Number of characters
count
=
0
count
=
0
listwords
=
[]
listwords
=
[]
...
@@ -225,7 +226,7 @@ class ComplexityAnalyzer():
...
@@ -225,7 +226,7 @@ class ComplexityAnalyzer():
return
320
return
320
return
1
+
max
(
self
.
tree_height
(
x
,
cont
)
for
x
in
root
.
children
)
return
1
+
max
(
self
.
tree_height
(
x
,
cont
)
for
x
in
root
.
children
)
def
embedding
D
epth
(
self
):
def
embedding
_d
epth
(
self
):
## Output results
## Output results
roots
=
[
sent
.
root
for
sent
in
self
.
sentences
]
roots
=
[
sent
.
root
for
sent
in
self
.
sentences
]
max_list
=
[]
max_list
=
[]
...
@@ -304,7 +305,7 @@ class ComplexityAnalyzer():
...
@@ -304,7 +305,7 @@ class ComplexityAnalyzer():
return
self
.
n_syllables
,
self
.
n_syllables3
,
self
.
nletters
,
self
.
huertareadability
,
self
.
ifszreadability
,
self
.
polinicompressibility
,
self
.
mureadability
,
self
.
syll_words
,
self
.
words_sen
return
self
.
n_syllables
,
self
.
n_syllables3
,
self
.
nletters
,
self
.
huertareadability
,
self
.
ifszreadability
,
self
.
polinicompressibility
,
self
.
mureadability
,
self
.
syll_words
,
self
.
words_sen
def
age
R
eadability
(
self
):
def
age
_r
eadability
(
self
):
minimumage
=
0.2495
*
(
self
.
N_words
/
self
.
nsentences
)
+
6.4763
*
(
self
.
n_syllables
/
self
.
N_words
)
-
7.1395
minimumage
=
0.2495
*
(
self
.
N_words
/
self
.
nsentences
)
+
6.4763
*
(
self
.
n_syllables
/
self
.
N_words
)
-
7.1395
self
.
minimumage
=
minimumage
self
.
minimumage
=
minimumage
...
@@ -314,7 +315,7 @@ class ComplexityAnalyzer():
...
@@ -314,7 +315,7 @@ class ComplexityAnalyzer():
return
self
.
minimumage
,
self
.
solreadability
return
self
.
minimumage
,
self
.
solreadability
def
years
C
rawford
(
self
):
def
years
_c
rawford
(
self
):
years
=
-
20.5
*
(
self
.
nsentences
/
self
.
N_words
)
+
4.9
*
(
self
.
n_syllables
/
self
.
N_words
)
-
3.407
years
=
-
20.5
*
(
self
.
nsentences
/
self
.
N_words
)
+
4.9
*
(
self
.
n_syllables
/
self
.
N_words
)
-
3.407
self
.
years
=
years
self
.
years
=
years
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment