Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
textflow
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
1
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
5fdda07f
authored
May 30, 2022
by
Estrella Vallecillo
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Fixing some bugs
parent
e65ff7ab
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
5 deletions
Examples/AnalyzeADataframe.ipynb
textflow/EmotionAnalyzer.py
textflow/PolarityAnalyzer.py
textflow/StylometryAnalyzer.py
Examples/AnalyzeADataframe.ipynb
0 → 100644
View file @
5fdda07f
This diff could not be displayed because it is too large.
textflow/EmotionAnalyzer.py
View file @
5fdda07f
...
@@ -14,7 +14,7 @@ class EmotionAnalyzer(Analyzer):
...
@@ -14,7 +14,7 @@ class EmotionAnalyzer(Analyzer):
polarityClassifier: a pipeline that uses a model for inference the emotions of the text of a sequence.
polarityClassifier: a pipeline that uses a model for inference the emotions of the text of a sequence.
"""
"""
def
__init__
(
self
,
task
=
"text-classification"
,
modelEmotions
=
'pysentimiento/robertuito-emotion-analysis'
,
allScores
=
True
):
def
__init__
(
self
,
task
=
"text-classification"
,
modelEmotions
=
'pysentimiento/robertuito-emotion-analysis'
,
allScores
=
True
,
maxEmbedding
=
130
):
"""
"""
Create a emotions analyzer.
Create a emotions analyzer.
...
@@ -22,8 +22,10 @@ class EmotionAnalyzer(Analyzer):
...
@@ -22,8 +22,10 @@ class EmotionAnalyzer(Analyzer):
task: the task defining which pipeline will be returned.
task: the task defining which pipeline will be returned.
model: the model that will be used by the pipeline to make predictions.
model: the model that will be used by the pipeline to make predictions.
allScores: True, if we want that the classifier returns all scores. False, in other case.
allScores: True, if we want that the classifier returns all scores. False, in other case.
maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
"""
"""
self
.
emotionsClassifier
=
pipeline
(
task
,
model
=
modelEmotions
,
return_all_scores
=
allScores
)
self
.
emotionsClassifier
=
pipeline
(
task
,
model
=
modelEmotions
,
return_all_scores
=
allScores
)
self
.
maxEmbedding
=
maxEmbedding
def
analyze
(
self
,
sequence
,
tag
,
levelOfAnalyzer
,
levelOfResult
:
Optional
[
str
]
=
""
):
def
analyze
(
self
,
sequence
,
tag
,
levelOfAnalyzer
,
levelOfResult
:
Optional
[
str
]
=
""
):
...
@@ -51,7 +53,7 @@ class EmotionAnalyzer(Analyzer):
...
@@ -51,7 +53,7 @@ class EmotionAnalyzer(Analyzer):
"""
"""
arrayResults
=
[]
arrayResults
=
[]
for
text
in
arrayText
:
for
text
in
arrayText
:
prediction
=
self
.
emotionsClassifier
(
text
)
prediction
=
self
.
emotionsClassifier
(
text
[:
self
.
maxEmbedding
]
)
#arrayResults.append(prediction[0][0])
#arrayResults.append(prediction[0][0])
arrayResults
.
append
(
prediction
)
arrayResults
.
append
(
prediction
)
return
arrayResults
return
arrayResults
...
...
textflow/PolarityAnalyzer.py
View file @
5fdda07f
...
@@ -11,7 +11,7 @@ class PolarityAnalyzer(Analyzer):
...
@@ -11,7 +11,7 @@ class PolarityAnalyzer(Analyzer):
polarityClassifier: a pipeline that uses a model for inference the polarity of the text of a sequence.
polarityClassifier: a pipeline that uses a model for inference the polarity of the text of a sequence.
"""
"""
def
__init__
(
self
,
task
=
"text-classification"
,
modelPolarity
=
'finiteautomata/beto-sentiment-analysis'
,
allScores
=
True
):
def
__init__
(
self
,
task
=
"text-classification"
,
modelPolarity
=
'finiteautomata/beto-sentiment-analysis'
,
allScores
=
True
,
maxEmbedding
=
512
):
"""
"""
Create a polarity analyzer.
Create a polarity analyzer.
...
@@ -19,8 +19,10 @@ class PolarityAnalyzer(Analyzer):
...
@@ -19,8 +19,10 @@ class PolarityAnalyzer(Analyzer):
task: the task defining which pipeline will be returned
task: the task defining which pipeline will be returned
model: the model that will be used by the pipeline to make predictions
model: the model that will be used by the pipeline to make predictions
allScores: True, if we want that the classifier returns all scores. False, in other case
allScores: True, if we want that the classifier returns all scores. False, in other case
maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
"""
"""
self
.
polarityClassifier
=
pipeline
(
task
,
model
=
modelPolarity
,
return_all_scores
=
allScores
)
self
.
polarityClassifier
=
pipeline
(
task
,
model
=
modelPolarity
,
return_all_scores
=
allScores
)
self
.
maxEmbeding
=
maxEmbedding
...
@@ -48,7 +50,7 @@ class PolarityAnalyzer(Analyzer):
...
@@ -48,7 +50,7 @@ class PolarityAnalyzer(Analyzer):
"""
"""
arrayResults
=
[]
arrayResults
=
[]
for
text
in
arrayText
:
for
text
in
arrayText
:
prediction
=
self
.
polarityClassifier
(
text
)
prediction
=
self
.
polarityClassifier
(
text
[:
self
.
maxEmbeding
]
)
#arrayResults.append(prediction[0][0])
#arrayResults.append(prediction[0][0])
arrayResults
.
append
(
prediction
)
arrayResults
.
append
(
prediction
)
return
arrayResults
return
arrayResults
...
...
textflow/StylometryAnalyzer.py
View file @
5fdda07f
...
@@ -69,7 +69,7 @@ class StylometryAnalyzer(Analyzer):
...
@@ -69,7 +69,7 @@ class StylometryAnalyzer(Analyzer):
resultsList
=
[]
resultsList
=
[]
for
t
in
arrayText
:
for
t
in
arrayText
:
t
.
lower
()
t
.
lower
()
tokens
=
self
.
tokenizer
.
tokenize
(
t
)
tokens
=
self
.
tokenizer
.
tokenize
(
t
)
text
=
[
token
.
lower
()
for
token
in
tokens
]
text
=
[
token
.
lower
()
for
token
in
tokens
]
self
.
freqWords
(
text
,
self
.
stopwords
,
self
.
puntuation
)
self
.
freqWords
(
text
,
self
.
stopwords
,
self
.
puntuation
)
self
.
funcionesTTR
(
text
)
self
.
funcionesTTR
(
text
)
...
@@ -100,8 +100,19 @@ class StylometryAnalyzer(Analyzer):
...
@@ -100,8 +100,19 @@ class StylometryAnalyzer(Analyzer):
self
.
numWordFreqOne
=
len
(
[
token
[
0
]
for
token
in
self
.
freqWord
if
token
[
1
]
==
1
])
self
.
numWordFreqOne
=
len
(
[
token
[
0
]
for
token
in
self
.
freqWord
if
token
[
1
]
==
1
])
self
.
TTR
=
len
(
self
.
uniqueWords
)
/
len
(
text
)
self
.
TTR
=
len
(
self
.
uniqueWords
)
/
len
(
text
)
self
.
RTTR
=
len
(
self
.
uniqueWords
)
/
math
.
sqrt
(
len
(
text
))
self
.
RTTR
=
len
(
self
.
uniqueWords
)
/
math
.
sqrt
(
len
(
text
))
if
len
(
text
)
==
1
:
self
.
herdan
=
math
.
log
(
len
(
self
.
uniqueWords
),
10
)
else
:
self
.
herdan
=
math
.
log
(
len
(
self
.
uniqueWords
),
10
)
/
math
.
log
(
len
(
text
),
10
)
self
.
herdan
=
math
.
log
(
len
(
self
.
uniqueWords
),
10
)
/
math
.
log
(
len
(
text
),
10
)
if
pow
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
2
)
==
0
:
self
.
mass
=
(
math
.
log
(
len
(
text
),
10
)
-
math
.
log
(
len
(
self
.
uniqueWords
),
10
))
else
:
self
.
mass
=
(
math
.
log
(
len
(
text
),
10
)
-
math
.
log
(
len
(
self
.
uniqueWords
),
10
))
/
pow
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
2
)
self
.
mass
=
(
math
.
log
(
len
(
text
),
10
)
-
math
.
log
(
len
(
self
.
uniqueWords
),
10
))
/
pow
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
2
)
if
len
(
text
)
==
10
:
self
.
somers
=
math
.
log
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
10
)
elif
len
(
self
.
uniqueWords
)
==
10
or
len
(
self
.
uniqueWords
)
==
1
:
self
.
somers
=
0
else
:
self
.
somers
=
math
.
log
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
10
)
/
math
.
log
(
math
.
log
(
len
(
text
),
10
),
10
)
self
.
somers
=
math
.
log
(
math
.
log
(
len
(
self
.
uniqueWords
),
10
),
10
)
/
math
.
log
(
math
.
log
(
len
(
text
),
10
),
10
)
if
math
.
log
(
len
(
text
),
10
)
-
math
.
log
(
len
(
self
.
uniqueWords
),
10
)
==
0
:
if
math
.
log
(
len
(
text
),
10
)
-
math
.
log
(
len
(
self
.
uniqueWords
),
10
)
==
0
:
self
.
dugast
=
pow
(
math
.
log
(
len
(
text
),
10
),
2
)
self
.
dugast
=
pow
(
math
.
log
(
len
(
text
),
10
),
2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment