Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SINAI
/
texty
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
182e38fe
authored
Mar 24, 2022
by
Jaime Collado
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Bug fixes and refactorization
parent
a88860e6
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
20 deletions
.gitignore
__init__.py
examples/example.ipynb
src/texty/complexity.py
.gitignore
View file @
182e38fe
__pycache__
*.pyc
.ipynb_checkpoints
\ No newline at end of file
__init__.py
0 → 100644
View file @
182e38fe
File mode changed
examples/example.ipynb
View file @
182e38fe
This diff could not be displayed because it is too large.
src/texty/complexity.py
View file @
182e38fe
from
functools
import
reduce
import
spacy
import
math
import
syllables
import
os
import
re
import
numpy
as
np
import
syllables
crea_total_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'CREA_total.txt'
)
class
ComplexityAnalyzer
():
...
...
@@ -44,15 +45,15 @@ class ComplexityAnalyzer():
self
.
pos_content_sentences
=
pos_content_sentences
def
get_all_metrics
(
self
):
self
.
punctuation
M
arks
()
self
.
lexical
C
omplexity
()
self
.
ss
R
eadability
()
self
.
sentence
C
omplexity
()
self
.
auto
R
eadability
()
self
.
embedding
D
epth
()
self
.
punctuation
_m
arks
()
self
.
lexical
_c
omplexity
()
self
.
ss
_r
eadability
()
self
.
sentence
_c
omplexity
()
self
.
auto
_r
eadability
()
self
.
embedding
_d
epth
()
self
.
readability
()
self
.
age
R
eadability
()
self
.
years
C
rawford
()
self
.
age
_r
eadability
()
self
.
years
_c
rawford
()
metrics
=
{
'npunct'
:
self
.
npunctuation
,
# number of punctuation marks
...
...
@@ -82,7 +83,7 @@ class ComplexityAnalyzer():
}
return
metrics
def
punctuation
M
arks
(
self
):
def
punctuation
_m
arks
(
self
):
# Solo nos interesa contar los tokens que sean signo de puntuación.
# Number of words.
punctuation
=
[]
...
...
@@ -103,12 +104,12 @@ class ComplexityAnalyzer():
return
self
.
npunctuation
,
self
.
punctuation
,
self
.
N_words
def
lexical
C
omplexity
(
self
):
def
lexical
_c
omplexity
(
self
):
# Number of low frequency words
count
=
0
for
sentence
in
self
.
pos_content_sentences
:
for
w
in
sentence
:
if
w
.
text
not
in
self
.
crea
:
if
w
.
text
not
in
self
.
lang_word_freqs
:
count
+=
1
N_lfw
=
count
self
.
N_lfw
=
N_lfw
...
...
@@ -142,12 +143,12 @@ class ComplexityAnalyzer():
return
self
.
N_lfw
,
self
.
N_cw
,
self
.
N_dcw
,
self
.
N_s
,
self
.
LDI
,
self
.
ILFW
,
self
.
LC
def
ss
R
eadability
(
self
):
def
ss
_r
eadability
(
self
):
'''
Spaulding Score of Readability
'''
#Number of rare words
byfreq
=
sorted
(
self
.
crea
,
key
=
self
.
crea
.
__getitem__
,
reverse
=
True
)
byfreq
=
sorted
(
self
.
lang_word_freqs
,
key
=
self
.
lang_word_freqs
.
__getitem__
,
reverse
=
True
)
byfreq
=
byfreq
[:
1500
]
count
=
0
for
sentence
in
self
.
pos_content_sentences
:
...
...
@@ -162,7 +163,7 @@ class ComplexityAnalyzer():
return
self
.
N_rw
,
self
.
SSR
def
sentence
C
omplexity
(
self
):
def
sentence
_c
omplexity
(
self
):
#Number of complex sentences
N_cs
=
0
for
sentence
in
self
.
sentences
:
...
...
@@ -192,7 +193,7 @@ class ComplexityAnalyzer():
return
self
.
N_cs
,
self
.
ASL
,
self
.
CS
,
self
.
SCI
def
auto
R
eadability
(
self
):
def
auto
_r
eadability
(
self
):
# Number of characters
count
=
0
listwords
=
[]
...
...
@@ -225,7 +226,7 @@ class ComplexityAnalyzer():
return
320
return
1
+
max
(
self
.
tree_height
(
x
,
cont
)
for
x
in
root
.
children
)
def
embedding
D
epth
(
self
):
def
embedding
_d
epth
(
self
):
## Output results
roots
=
[
sent
.
root
for
sent
in
self
.
sentences
]
max_list
=
[]
...
...
@@ -304,7 +305,7 @@ class ComplexityAnalyzer():
return
self
.
n_syllables
,
self
.
n_syllables3
,
self
.
nletters
,
self
.
huertareadability
,
self
.
ifszreadability
,
self
.
polinicompressibility
,
self
.
mureadability
,
self
.
syll_words
,
self
.
words_sen
def
age
R
eadability
(
self
):
def
age
_r
eadability
(
self
):
minimumage
=
0.2495
*
(
self
.
N_words
/
self
.
nsentences
)
+
6.4763
*
(
self
.
n_syllables
/
self
.
N_words
)
-
7.1395
self
.
minimumage
=
minimumage
...
...
@@ -314,7 +315,7 @@ class ComplexityAnalyzer():
return
self
.
minimumage
,
self
.
solreadability
def
years
C
rawford
(
self
):
def
years
_c
rawford
(
self
):
years
=
-
20.5
*
(
self
.
nsentences
/
self
.
N_words
)
+
4.9
*
(
self
.
n_syllables
/
self
.
N_words
)
-
3.407
self
.
years
=
years
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment