Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SINAI
/
texty
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
e289393a
authored
Feb 15, 2022
by
Alba Maria Mármol
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Update TextAnalysisSpacy.py
parent
d682f4fa
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
1 deletions
TextAnalysisSpacy.py
TextAnalysisSpacy.py
View file @
e289393a
...
...
@@ -17,6 +17,7 @@ import nltk
from
nltk.probability
import
FreqDist
from
nltk.text
import
Text
from
lexical_diversity
import
lex_div
as
ld
from
transformers
import
pipeline
class
TextAnalysisSpacy
():
...
...
@@ -306,6 +307,39 @@ class TextAnalysisSpacy():
self
.
showGraph
(
col_complexity
,
'heatmap'
)
return
self
.
df
def
emotions
(
self
):
classifier6
=
pipeline
(
"text-classification"
,
model
=
'model1_path'
,
return_all_scores
=
True
)
classifier27
=
pipeline
(
"text-classification"
,
model
=
'model2_path'
,
return_all_scores
=
True
)
i
=
0
for
text
in
tqdm
(
self
.
df
[
'text'
]):
text
=
str
(
text
[:
512
])
prediction
=
classifier6
(
text
)
prediction2
=
classifier27
(
text
)
for
pred
in
prediction
[
0
]:
self
.
df
.
loc
[
i
,
'emo6_'
+
pred
[
'label'
]]
=
pred
[
'score'
]
for
pred
in
prediction2
[
0
]:
self
.
df
.
loc
[
i
,
'emo28_'
+
pred
[
'label'
]]
=
pred
[
'score'
]
i
+=
1
# Emotions for each category
i
=
0
col_emotions
=
[
'emo6_sadness'
,
'emo6_joy'
,
'emo6_love'
,
'emo6_anger'
,
'emo6_fear'
,
'emo6_surprise'
,
'emo28_admiration'
,
'emo28_amusement'
,
'emo28_anger'
,
'emo28_annoyance'
,
'emo28_approval'
,
'emo28_caring'
,
'emo28_confusion'
,
'emo28_curiosity'
,
'emo28_desire'
,
'emo28_disappointment'
,
'emo28_disapproval'
,
'emo28_disgust'
,
'emo28_embarrassment'
,
'emo28_excitement'
,
'emo28_fear'
,
'emo28_gratitude'
,
'emo28_grief'
,
'emo28_joy'
,
'emo28_love'
,
'emo28_nervousness'
,
'emo28_optimism'
,
'emo28_pride'
,
'emo28_realization'
,
'emo28_relief'
,
'emo28_remorse'
,
'emo28_sadness'
,
'emo28_surprise'
,
'emo28_neutral'
]
print
(
'Emotions for each text'
)
display
(
self
.
df
.
head
())
print
(
'Emotions for each category'
)
display
(
self
.
df
.
groupby
(
'category'
)
.
agg
([
'mean'
,
'median'
,
'std'
]))
self
.
showGraph
(
col_emotions
,
'strip'
)
self
.
showGraph
(
col_emotions
,
'box'
)
self
.
showGraph
(
col_emotions
,
'heatmap'
)
def
featureSelection
(
self
):
df
=
self
.
df
.
fillna
(
0
)
X
=
df
.
iloc
[:,
2
:]
...
...
@@ -337,7 +371,6 @@ class TextAnalysisSpacy():
df
=
self
.
df
.
fillna
(
0
)
X
=
df
.
iloc
[:,
2
:]
y
=
df
.
iloc
[:,
0
]
# Univariate feature selection
from
sklearn.feature_selection
import
SelectKBest
from
sklearn.feature_selection
import
f_classif
,
mutual_info_classif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment