Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Alba Maria Mármol
/
TextAnalysisSpacy
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Pipelines
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
dbfb285b
authored
Feb 09, 2022
by
Alba Maria Mármol
Browse files
Options
_('Browse Files')
Download
Plain Diff
Merge remote-tracking branch 'origin/master'
parents
d4ec88ef
0e30ea56
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
410 additions
and
1 deletions
TextAnalisisSpacy.py
TextComplexitySpacy.py
TextAnalisisSpacy.py
0 → 100644
View file @
dbfb285b
# Only required for analysis in Spanish
import
spacy.cli
spacy
.
cli
.
download
(
"es_core_news_sm"
)
import
es_core_news_sm
# Imports
import
spacy
import
numpy
as
np
from
tqdm
import
tqdm
import
re
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
%
matplotlib
inline
import
seaborn
as
sb
import
nltk
from
nltk.probability
import
FreqDist
from
nltk.text
import
Text
from
lexical_diversity
import
lex_div
as
ld
class
TextAnalisisSpacy
():
def
__init__
(
self
,
lang
=
'es'
):
# Create language analyzer
if
lang
==
'es'
:
self
.
nlp
=
es_core_news_sm
.
load
()
self
.
textComplexitySpacy
=
TextComplexitySpacy
()
self
.
nlp
=
es_core_news_sm
.
load
()
elif
lang
==
'en'
:
self
.
nlp
=
spacy
.
load
(
"en_core_web_sm"
)
self
.
textComplexitySpacy
=
TextComplexitySpacy
(
'en'
)
self
.
Text
=
Text
self
.
FreqDist
=
FreqDist
self
.
POS_LIST
=
[
"ADJ"
,
"ADP"
,
"ADV"
,
"AUX"
,
"X"
,
"CCONJ"
,
"CONJ"
,
"DET"
,
"INTJ"
,
"NOUN"
,
"NUM"
,
"PART"
,
"PRON"
,
"PROPN"
,
"PUNCT"
,
"SCONJ"
,
"SYM"
,
"VERB"
,
"SPACE"
]
pass
#
# X = samples input , y = tags
#
def
textProcessing
(
self
,
X
,
y
):
d
=
{
'category'
:
y
,
'text'
:
X
}
self
.
df
=
pd
.
DataFrame
(
d
)
# Replace gaps
self
.
df
[
'text'
]
.
replace
(
np
.
nan
,
''
,
inplace
=
True
)
print
(
'Shape: '
,
self
.
df
.
shape
)
# Create category dictionary
self
.
dic_categorias
=
{}
for
i
in
range
(
len
(
df
)):
if
df
.
iloc
[
i
,
0
]
in
self
.
dic_categorias
:
self
.
dic_categorias
[
df
.
iloc
[
i
,
0
]]
+=
1
else
:
self
.
dic_categorias
[
df
.
iloc
[
i
,
0
]]
=
1
self
.
df_category
=
pd
.
DataFrame
({
'category'
:
self
.
dic_categorias
.
keys
()})
print
(
'Dictionary of categories:'
,
self
.
dic_categorias
)
# Initialising variables for graphs
sb
.
set
(
rc
=
{
'figure.figsize'
:(
14
,
6
)})
all_values
=
self
.
dic_categorias
.
values
()
self
.
max_value
=
max
(
all_values
)
def
showGraph
(
self
,
columnas
,
type_g
=
'strip'
,
export
=
False
):
# Graph generator
for
columna
in
columnas
:
if
(
type_g
==
'strip'
):
splot
=
sb
.
stripplot
(
x
=
columna
,
y
=
'category'
,
data
=
self
.
df
)
elif
(
type_g
==
'box'
):
splot
=
sb
.
boxplot
(
x
=
columna
,
y
=
'category'
,
data
=
self
.
df
)
elif
(
type_g
==
'heatmap'
):
dic
=
{}
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
dic
[
cat
]
=
df_grupo
[
columna
]
.
tolist
()
while
len
(
dic
[
cat
])
<
self
.
max_value
:
dic
[
cat
]
.
append
(
dic
[
cat
][
len
(
dic
[
cat
])
-
1
])
df_n
=
pd
.
DataFrame
(
dic
)
splot
=
sb
.
heatmap
(
df_n
.
transpose
())
.
set_title
(
columna
)
if
export
==
False
:
plt
.
show
()
else
:
splot
.
get_figure
()
.
savefig
(
columna
+
"-"
+
type_g
+
".jpg"
,
bbox_inches
=
'tight'
)
plt
.
clf
()
def
export
(
self
):
print
(
'Exporting...'
)
self
.
df
.
to_csv
(
"data.csv"
)
self
.
df_category
.
to_csv
(
"data_cat.csv"
)
self
.
showGraph
(
self
.
df
.
columns
[
2
:],
'strip'
,
True
)
self
.
showGraph
(
self
.
df
.
columns
[
2
:],
'box'
,
True
)
self
.
showGraph
(
self
.
df
.
columns
[
2
:],
'heatmap'
,
True
)
def
volumetry
(
self
):
# Volumetrics for each text
self
.
df
[
'words'
]
=
[
len
(
text
.
split
())
for
text
in
self
.
df
[
'text'
]
.
tolist
()]
# Number of words
self
.
df
[
'uniques'
]
=
[
len
(
set
(
text
.
split
()))
for
text
in
self
.
df
[
'text'
]
.
tolist
()]
# Number of unique words
self
.
df
[
'chars'
]
=
self
.
df
[
'text'
]
.
str
.
len
()
# Number of characters
self
.
df
[
'avg_words_len'
]
=
round
(
self
.
df
[
'chars'
]
/
self
.
df
[
'words'
],
3
)
# Average word length
self
.
df
=
self
.
df
.
replace
([
np
.
inf
,
-
np
.
inf
,
np
.
nan
],
0
)
# Volumetrics for each category
volumetry
=
[
'words'
,
'uniques'
,
'chars'
,
'avg_words_len'
]
category_columns
=
[
'category'
,
'docs'
]
for
col
in
volumetry
:
category_columns
.
append
(
'avg_'
+
col
)
category_columns
.
append
(
'std_'
+
col
)
i
=
0
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
col
in
volumetry
:
self
.
df_category
.
loc
[
i
,
'docs'
]
=
len
(
df_grupo
)
self
.
df_category
.
loc
[
i
,
'avg_'
+
col
]
=
round
(
df_grupo
[
col
]
.
mean
(),
3
)
self
.
df_category
.
loc
[
i
,
'std_'
+
col
]
=
round
(
df_grupo
[
col
]
.
std
(),
5
)
i
+=
1
print
(
'Volumetrics for each text:'
)
display
(
self
.
df
.
head
())
print
(
'Volumetrics for each category:'
)
display
(
self
.
df_category
[
category_columns
])
self
.
showGraph
(
volumetry
,
'strip'
)
self
.
showGraph
(
volumetry
,
'box'
)
self
.
showGraph
(
volumetry
,
'heatmap'
)
return
self
.
df
,
self
.
df_category
def
lemmas
(
self
):
# Number and length of different lemmas per text
dic_lemmas
=
{}
for
cat
in
self
.
dic_categorias
:
dic_lemmas
[
cat
]
=
[]
i
=
0
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
tqdm
(
self
.
dic_categorias
):
df_grupo
=
groups
.
get_group
(
cat
)
for
text
in
df_grupo
[
'text'
]
.
tolist
():
set_
=
set
()
suma
=
0
doc
=
self
.
nlp
(
text
)
for
token
in
doc
:
set_
.
add
(
token
.
lemma_
)
suma
+=
len
(
token
.
lemma_
)
if
re
.
match
(
'PUNCT.*|SYM.*|SPACE.*'
,
token
.
pos_
)
==
None
:
dic_lemmas
[
cat
]
.
append
(
token
.
lemma_
)
self
.
df
.
loc
[
i
,
'lemmas_uniques'
]
=
len
(
set_
)
if
(
len
(
set_
)
!=
0
):
self
.
df
.
loc
[
i
,
'avg_lemmas_len'
]
=
round
(
suma
/
len
(
set_
),
3
)
else
:
self
.
df
.
loc
[
i
,
'avg_lemmas_len'
]
=
suma
i
+=
1
self
.
dic_lemmas
=
dic_lemmas
# Average and variance of different lemmas and length by category
i
=
0
col_lemmas
=
[
'lemmas_uniques'
,
'avg_lemmas_len'
]
category_lemmas
=
[
'category'
]
for
col
in
col_lemmas
:
category_lemmas
.
append
(
'avg_'
+
col
)
category_lemmas
.
append
(
'std_'
+
col
)
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
col
in
col_lemmas
:
self
.
df_category
.
loc
[
i
,
'docs'
]
=
len
(
df_grupo
)
self
.
df_category
.
loc
[
i
,
'avg_'
+
col
]
=
round
(
df_grupo
[
col
]
.
mean
(),
3
)
self
.
df_category
.
loc
[
i
,
'std_'
+
col
]
=
round
(
df_grupo
[
col
]
.
std
(),
3
)
i
+=
1
print
(
'Lemmas for each text:'
)
display
(
self
.
df
.
head
())
print
(
'Lemmas for each category:'
)
display
(
self
.
df_category
[
category_lemmas
])
self
.
showGraph
(
col_lemmas
,
'strip'
)
self
.
showGraph
(
col_lemmas
,
'box'
)
self
.
showGraph
(
col_lemmas
,
'heatmap'
)
return
self
.
df
,
self
.
df_category
def
lemmas_freq
(
self
,
n
=
50
):
# Most frequent lemmas by category
dic_f_lemmas
=
self
.
dic_categorias
.
copy
()
for
cat
in
self
.
dic_categorias
:
text
=
self
.
Text
(
self
.
dic_lemmas
[
cat
])
dic_f_lemmas
[
cat
]
=
self
.
FreqDist
(
text
)
.
most_common
(
n
)
lista
=
[]
for
tupla
in
dic_f_lemmas
[
cat
]:
lista
.
append
((
tupla
[
0
],
round
(
tupla
[
1
]
/
len
(
self
.
dic_lemmas
[
cat
]),
4
)))
while
(
len
(
lista
)
<
n
):
# Rellenar huecos
lista
.
append
(
np
.
nan
)
dic_f_lemmas
[
cat
]
=
lista
df_freq_lemas
=
pd
.
DataFrame
(
dic_f_lemmas
)
df_freq_lemas_tr
=
df_freq_lemas
.
transpose
()
print
(
'Most frequent lemmas by category'
)
display
(
df_freq_lemas_tr
)
df_freq_lemas_tr
.
to_csv
(
"lemas_freq.csv"
)
return
df_freq_lemas
.
transpose
()
def
pos
(
self
):
# POS analysis for each text
dic_pos_cat
=
{}
for
pos
in
self
.
POS_LIST
:
dic_pos_cat
[
pos
]
=
{}
for
cat
in
self
.
dic_categorias
:
dic_pos_cat
[
pos
][
cat
]
=
[]
i
=
0
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
text
in
tqdm
(
df_grupo
[
'text'
]
.
tolist
()):
dic_pos
=
{}
doc
=
self
.
nlp
(
text
)
for
token
in
doc
:
if
token
.
pos_
in
dic_pos
:
dic_pos
[
token
.
pos_
]
+=
1
else
:
dic_pos
[
token
.
pos_
]
=
1
dic_pos_cat
[
token
.
pos_
][
cat
]
.
append
(
token
.
text
)
total
=
len
(
doc
)
if
total
==
0
:
total
=
1
for
pos
in
self
.
POS_LIST
:
if
pos
in
dic_pos
:
self
.
df
.
loc
[
i
,
pos
]
=
round
(
dic_pos
[
pos
]
/
total
,
4
)
else
:
self
.
df
.
loc
[
i
,
pos
]
=
np
.
nan
i
+=
1
self
.
dic_pos_cat
=
dic_pos_cat
# POS analysis for each category
i
=
0
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
pos
in
self
.
POS_LIST
:
if
pos
in
df_grupo
.
columns
.
values
:
self
.
df_category
.
loc
[
i
,
'avg_'
+
pos
]
=
round
(
df_grupo
[
pos
]
.
mean
(),
3
)
self
.
df_category
.
loc
[
i
,
'std_'
+
pos
]
=
round
(
df_grupo
[
pos
]
.
std
(),
3
)
i
+=
1
print
(
'POS analysis for each text'
)
display
(
self
.
df
.
head
())
print
(
'POS analysis for each category'
)
display
(
self
.
df_category
)
self
.
showGraph
(
self
.
POS_LIST
,
'strip'
)
self
.
showGraph
(
self
.
POS_LIST
,
'box'
)
self
.
showGraph
(
self
.
POS_LIST
,
'heatmap'
)
return
self
.
df
,
self
.
df_category
def
pos_freq
(
self
,
n
=
15
):
# Most frequent words
dic_f_palabras
=
self
.
dic_categorias
.
copy
()
for
pos
in
self
.
POS_LIST
:
for
cat
in
self
.
dic_categorias
:
if
cat
in
self
.
dic_pos_cat
[
pos
]:
text
=
self
.
Text
(
self
.
dic_pos_cat
[
pos
][
cat
])
fdist
=
self
.
FreqDist
(
text
)
dic_f_palabras
[
cat
]
=
fdist
.
most_common
(
n
)
lista
=
[]
for
tupla
in
dic_f_palabras
[
cat
]:
lista
.
append
((
tupla
[
0
],
round
(
tupla
[
1
]
/
len
(
self
.
dic_pos_cat
[
pos
][
cat
]),
5
)))
dic_f_palabras
[
cat
]
=
lista
while
(
len
(
dic_f_palabras
[
cat
])
<
n
):
# Rellenar huecos
dic_f_palabras
[
cat
]
.
append
(
np
.
nan
)
df_freq_palabras
=
pd
.
DataFrame
(
dic_f_palabras
)
print
(
"---- Para "
+
spacy
.
explain
(
pos
)
+
" las "
+
str
(
n
)
+
" palabras más frecuentes son: -------"
)
display
(
df_freq_palabras
.
transpose
())
df_freq_palabras_tr
=
df_freq_palabras
.
transpose
()
df_freq_palabras_tr
.
to_csv
(
"POS_"
+
str
(
pos
)
+
"_freq.csv"
)
return
df_freq_palabras
.
transpose
()
def
lexical_diversity
(
self
):
# Lexical diversity for each text
i
=
0
for
text
in
tqdm
(
self
.
df
[
'text'
]
.
tolist
()):
flt
=
ld
.
flemmatize
(
text
)
self
.
df
.
loc
[
i
,
'simple_TTR'
]
=
round
(
ld
.
ttr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'root_TTR'
]
=
round
(
ld
.
root_ttr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'log_TTR'
]
=
round
(
ld
.
log_ttr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'maas_TTR'
]
=
round
(
ld
.
maas_ttr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'MSTTR'
]
=
round
(
ld
.
msttr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'MATTR'
]
=
round
(
ld
.
mattr
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'HDD'
]
=
round
(
ld
.
hdd
(
flt
),
4
)
self
.
df
.
loc
[
i
,
'MTLD'
]
=
round
(
ld
.
mtld
(
flt
),
4
)
i
+=
1
# Lexical diversity for each category
i
=
0
col_diversity
=
[
'simple_TTR'
,
'root_TTR'
,
'log_TTR'
,
'maas_TTR'
,
'MSTTR'
,
'MATTR'
,
'HDD'
,
'MTLD'
]
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
col
in
col_diversity
:
self
.
df_category
.
loc
[
i
,
'avg_'
+
col
]
=
round
(
df_grupo
[
col
]
.
mean
(),
4
)
self
.
df_category
.
loc
[
i
,
'std_'
+
col
]
=
round
(
df_grupo
[
col
]
.
std
(),
4
)
i
+=
1
print
(
'Lexical diversity for each text'
)
display
(
self
.
df
.
head
())
print
(
'Lexical diversity for each category'
)
display
(
self
.
df_category
)
self
.
showGraph
(
col_diversity
,
'strip'
)
self
.
showGraph
(
col_diversity
,
'box'
)
self
.
showGraph
(
col_diversity
,
'heatmap'
)
return
self
.
df
,
self
.
df_category
def
complexity
(
self
):
# Complexity diversity for each category
i
=
0
for
text
in
tqdm
(
self
.
df
[
'text'
]
.
tolist
()):
if
len
(
text
)
>
0
:
text_processed
=
self
.
textComplexitySpacy
.
textProcessing
(
text
)
pmarks
=
self
.
textComplexitySpacy
.
punctuationMarks
()[
0
]
self
.
df
.
loc
[
i
,
'lexcomplexity'
]
=
self
.
textComplexitySpacy
.
lexicalComplexity
()[
6
]
self
.
df
.
loc
[
i
,
'ssreadability'
]
=
self
.
textComplexitySpacy
.
ssReadability
()[
1
]
self
.
df
.
loc
[
i
,
'sencomplexity'
]
=
self
.
textComplexitySpacy
.
sentenceComplexity
()[
3
]
self
.
df
.
loc
[
i
,
'autoreadability'
]
=
self
.
textComplexitySpacy
.
autoReadability
()[
1
]
embeddingdepth
=
self
.
textComplexitySpacy
.
embeddingDepth
()
self
.
df
.
loc
[
i
,
'max_embeddingdepth'
]
=
embeddingdepth
[
0
]
self
.
df
.
loc
[
i
,
'min_embeddingdepth'
]
=
embeddingdepth
[
1
]
self
.
df
.
loc
[
i
,
'avg_embeddingdepth'
]
=
embeddingdepth
[
2
]
readability
=
self
.
textComplexitySpacy
.
readability
()
self
.
df
.
loc
[
i
,
'huertareadability'
]
=
round
(
readability
[
3
],
4
)
self
.
df
.
loc
[
i
,
'ifszreadability'
]
=
round
(
readability
[
4
],
4
)
self
.
df
.
loc
[
i
,
'polinicompressibility'
]
=
round
(
readability
[
5
],
4
)
self
.
df
.
loc
[
i
,
'mureadability'
]
=
round
(
readability
[
6
],
4
)
self
.
df
.
loc
[
i
,
'agereadability'
]
=
self
.
textComplexitySpacy
.
ageReadability
()[
0
]
self
.
df
.
loc
[
i
,
'yearscrawford'
]
=
self
.
textComplexitySpacy
.
yearsCrawford
()
i
+=
1
# Complexity diversity for each category
i
=
0
col_complexity
=
[
'lexcomplexity'
,
'ssreadability'
,
'sencomplexity'
,
'autoreadability'
,
'max_embeddingdepth'
,
'min_embeddingdepth'
,
'avg_embeddingdepth'
,
'huertareadability'
,
'ifszreadability'
,
'polinicompressibility'
,
'mureadability'
,
'agereadability'
,
'yearscrawford'
]
groups
=
self
.
df
.
groupby
(
self
.
df
.
category
)
for
cat
in
self
.
dic_categorias
:
df_grupo
=
groups
.
get_group
(
cat
)
for
col
in
col_complexity
:
self
.
df_category
.
loc
[
i
,
'avg_'
+
col
]
=
round
(
df_grupo
[
col
]
.
mean
(),
4
)
self
.
df_category
.
loc
[
i
,
'std_'
+
col
]
=
round
(
df_grupo
[
col
]
.
std
(),
4
)
i
+=
1
print
(
'Complexity diversity for each text'
)
display
(
self
.
df
.
head
())
print
(
'Complexity diversity for each category'
)
display
(
self
.
df_category
)
self
.
showGraph
(
col_complexity
,
'strip'
)
self
.
showGraph
(
col_complexity
,
'box'
)
self
.
showGraph
(
col_complexity
,
'heatmap'
)
return
self
.
df
,
self
.
df_category
def
featureSelection
(
self
):
df
=
self
.
df
.
fillna
(
0
)
X
=
df
.
iloc
[:,
2
:]
y
=
df
.
iloc
[:,
0
]
from
sklearn.feature_selection
import
VarianceThreshold
,
SelectFromModel
# Removing features with low variance
sel
=
VarianceThreshold
(
threshold
=
(
.
8
*
(
1
-
.
8
)))
# No varía en más del 80% de datos
arr
=
sel
.
fit_transform
(
X
)
print
(
'Removing features with low variance...'
)
print
(
'Selected columns:'
,
sel
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
arr
))
pd
.
DataFrame
(
arr
)
.
to_csv
(
"VarianceThreshold.csv"
)
# SelectFromModel
# Selection of functions based on L1
from
sklearn.svm
import
LinearSVC
lsvc
=
LinearSVC
(
C
=
0.01
,
penalty
=
"l1"
,
dual
=
False
)
.
fit
(
X
,
y
)
model
=
SelectFromModel
(
lsvc
,
prefit
=
True
)
X_new
=
model
.
transform
(
X
)
print
(
'Removing features with SelectFromModel...'
)
print
(
'Selected columns:'
,
model
.
get_feature_names_out
(
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
X_new
))
pd
.
DataFrame
(
X_new
)
.
to_csv
(
"SelectFromModel.csv"
)
def
kBest
(
self
,
k
=
10
):
df
=
self
.
df
.
fillna
(
0
)
X
=
df
.
iloc
[:,
2
:]
y
=
df
.
iloc
[:,
0
]
# Univariate feature selection
from
sklearn.feature_selection
import
SelectKBest
from
sklearn.feature_selection
import
f_classif
,
mutual_info_classif
print
(
'Highest scoring '
+
str
(
k
)
+
' features with f_classif...'
)
kbest_classif
=
SelectKBest
(
f_classif
,
k
=
k
)
# Elimina todo menos las k características de puntuación más alta
X_classif
=
kbest_classif
.
fit_transform
(
X
,
y
)
print
(
'Selected columns:'
,
kbest_classif
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
X_classif
))
pd
.
DataFrame
(
X_classif
)
.
to_csv
(
"f_classif.csv"
)
print
(
'Highest scoring '
+
str
(
k
)
+
' features with mutual_info_classif...'
)
kbest_mut
=
SelectKBest
(
mutual_info_classif
,
k
=
k
)
X_mut
=
kbest_mut
.
fit_transform
(
X
,
y
)
print
(
'Selected columns:'
,
kbest_mut
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
X_mut
))
pd
.
DataFrame
(
X_mut
)
.
to_csv
(
"mutual_info_classif.csv"
)
\ No newline at end of file
TextComplexitySpacy.py
View file @
dbfb285b
...
@@ -2,7 +2,7 @@ from functools import reduce
...
@@ -2,7 +2,7 @@ from functools import reduce
import
math
import
math
import
syllables
import
syllables
crea_total_path
=
'.
.
/CREA_total.txt'
crea_total_path
=
'./CREA_total.txt'
class
TextComplexitySpacy
():
class
TextComplexitySpacy
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment