Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SINAI
/
texty
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
0e30ea56
authored
Feb 09, 2022
by
Alba Maria Mármol
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Update TextAnalisisSpacy.py
parent
20d4231a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
6 deletions
TextAnalisisSpacy.py
TextAnalisisSpacy.py
View file @
0e30ea56
...
@@ -79,8 +79,9 @@ class TextAnalisisSpacy():
...
@@ -79,8 +79,9 @@ class TextAnalisisSpacy():
dic
[
cat
]
.
append
(
dic
[
cat
][
len
(
dic
[
cat
])
-
1
])
dic
[
cat
]
.
append
(
dic
[
cat
][
len
(
dic
[
cat
])
-
1
])
df_n
=
pd
.
DataFrame
(
dic
)
df_n
=
pd
.
DataFrame
(
dic
)
splot
=
sb
.
heatmap
(
df_n
.
transpose
())
.
set_title
(
columna
)
splot
=
sb
.
heatmap
(
df_n
.
transpose
())
.
set_title
(
columna
)
plt
.
show
()
if
export
==
False
:
if
export
:
plt
.
show
()
else
:
splot
.
get_figure
()
.
savefig
(
columna
+
"-"
+
type_g
+
".jpg"
,
bbox_inches
=
'tight'
)
splot
.
get_figure
()
.
savefig
(
columna
+
"-"
+
type_g
+
".jpg"
,
bbox_inches
=
'tight'
)
plt
.
clf
()
plt
.
clf
()
...
@@ -276,7 +277,7 @@ class TextAnalisisSpacy():
...
@@ -276,7 +277,7 @@ class TextAnalisisSpacy():
display
(
df_freq_palabras
.
transpose
())
display
(
df_freq_palabras
.
transpose
())
df_freq_palabras_tr
=
df_freq_palabras
.
transpose
()
df_freq_palabras_tr
=
df_freq_palabras
.
transpose
()
df_freq_palabras_tr
.
to_csv
(
"POS_"
+
str
(
pos
)
+
"_freq.csv"
)
df_freq_palabras_tr
.
to_csv
(
"POS_"
+
str
(
pos
)
+
"_freq.csv"
)
return
df_freq_palabras
.
transpose
()
return
df_freq_palabras
.
transpose
()
def
lexical_diversity
(
self
):
def
lexical_diversity
(
self
):
# Lexical diversity for each text
# Lexical diversity for each text
...
@@ -392,15 +393,15 @@ class TextAnalisisSpacy():
...
@@ -392,15 +393,15 @@ class TextAnalisisSpacy():
# Univariate feature selection
# Univariate feature selection
from
sklearn.feature_selection
import
SelectKBest
from
sklearn.feature_selection
import
SelectKBest
from
sklearn.feature_selection
import
f_classif
,
mutual_info_classif
from
sklearn.feature_selection
import
f_classif
,
mutual_info_classif
print
(
'Highest scoring '
+
k
+
' features with f_classif...'
)
print
(
'Highest scoring '
+
str
(
k
)
+
' features with f_classif...'
)
kbest_classif
=
SelectKBest
(
f_classif
,
k
)
# Elimina todo menos las k características de puntuación más alta
kbest_classif
=
SelectKBest
(
f_classif
,
k
=
k
)
# Elimina todo menos las k características de puntuación más alta
X_classif
=
kbest_classif
.
fit_transform
(
X
,
y
)
X_classif
=
kbest_classif
.
fit_transform
(
X
,
y
)
print
(
'Selected columns:'
,
kbest_classif
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
print
(
'Selected columns:'
,
kbest_classif
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
X_classif
))
display
(
pd
.
DataFrame
(
X_classif
))
pd
.
DataFrame
(
X_classif
)
.
to_csv
(
"f_classif.csv"
)
pd
.
DataFrame
(
X_classif
)
.
to_csv
(
"f_classif.csv"
)
print
(
'Highest scoring '
+
str
(
k
)
+
' features with mutual_info_classif...'
)
print
(
'Highest scoring '
+
str
(
k
)
+
' features with mutual_info_classif...'
)
kbest_mut
=
SelectKBest
(
mutual_info_classif
,
k
)
kbest_mut
=
SelectKBest
(
mutual_info_classif
,
k
=
k
)
X_mut
=
kbest_mut
.
fit_transform
(
X
,
y
)
X_mut
=
kbest_mut
.
fit_transform
(
X
,
y
)
print
(
'Selected columns:'
,
kbest_mut
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
print
(
'Selected columns:'
,
kbest_mut
.
get_feature_names_out
(
self
.
df
.
columns
.
values
[
2
:]))
display
(
pd
.
DataFrame
(
X_mut
))
display
(
pd
.
DataFrame
(
X_mut
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment