Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Jaime Collado
/
textflow
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
1
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
364dde8c
authored
Jun 23, 2022
by
Estrella Vallecillo
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
fixing some bugs
parent
61cb8472
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
7 additions
and
8 deletions
Examples/AnalyzeADataframe.ipynb
Examples/Example.ipynb
textflow/IronityAnalyzer.py
textflow/NERAnalyzer.py
textflow/Sequence.py
Examples/AnalyzeADataframe.ipynb
View file @
364dde8c
This diff could not be displayed because it is too large.
Examples/Example.ipynb
View file @
364dde8c
This diff is collapsed.
Click to expand it.
textflow/IronityAnalyzer.py
View file @
364dde8c
...
...
@@ -14,7 +14,7 @@ class IronityAnalyzer(Analyzer):
maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
"""
def
__init__
(
self
,
task
=
"text-classification"
,
modelIronity
=
'dtomas/roberta-base-bne-irony'
,
allScores
=
True
,
maxEmbedding
=
51
2
):
def
__init__
(
self
,
task
=
"text-classification"
,
modelIronity
=
'dtomas/roberta-base-bne-irony'
,
allScores
=
True
,
maxEmbedding
=
51
4
):
"""
Create an ironic analyzer.
...
...
@@ -28,8 +28,8 @@ class IronityAnalyzer(Analyzer):
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
modelIronity
)
model
.
config
.
id2label
=
{
0
:
'NI'
,
1
:
'I'
}
model
.
config
.
label2id
=
{
'NI'
:
0
,
'I'
:
1
}
tokenizer
=
AutoTokenizer
.
from_pretrained
(
modelIronity
,
model_max_length
=
512
)
self
.
ironityClassifier
=
pipeline
(
task
,
model
=
model
,
tokenizer
=
tokenizer
,
return_all_scores
=
allScores
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
modelIronity
)
self
.
ironityClassifier
=
pipeline
(
task
,
model
=
model
,
tokenizer
=
tokenizer
,
return_all_scores
=
allScores
,
truncation
=
True
)
else
:
self
.
ironityClassifier
=
pipeline
(
task
,
model
=
modelIronity
,
return_all_scores
=
allScores
)
self
.
maxEmbeding
=
maxEmbedding
...
...
textflow/NERAnalyzer.py
View file @
364dde8c
...
...
@@ -78,7 +78,6 @@ class NERAnalyzer(Analyzer):
textner
.
append
(
doc
[
i
]
.
ent_type_
)
else
:
textner
.
append
(
doc
[
i
]
.
text
)
print
(
textner
)
self
.
textNER
=
" "
.
join
(
textner
)
for
ent
in
doc
.
ents
:
#Guardamos el diccionario obtenido para la categoria de la palabra (si este existe)
...
...
textflow/Sequence.py
View file @
364dde8c
...
...
@@ -199,10 +199,10 @@ class Sequence(ABC):
ruta
=
level
.
split
(
"/"
)
children
=
[
self
.
children
]
results
=
[]
for
r
in
ruta
:
for
idx
,
r
in
enumerate
(
ruta
)
:
for
child
in
children
:
if
r
in
child
:
if
r
==
ruta
[
-
1
]:
if
r
==
ruta
[
-
1
]
and
idx
==
len
(
ruta
)
-
1
:
results
.
extend
(
child
[
r
])
else
:
children
=
[
c
.
children
for
c
in
child
[
r
]]
...
...
@@ -230,8 +230,8 @@ class Sequence(ABC):
children
=
[
self
.
children
]
metadata
=
[
self
.
metadata
]
results
=
[]
for
r
in
ruta
:
if
r
==
ruta
[
-
1
]:
for
idx
,
r
in
enumerate
(
ruta
)
:
if
r
==
ruta
[
-
1
]
and
idx
==
len
(
ruta
)
-
1
:
for
m
in
metadata
:
if
r
in
m
:
results
.
append
(
m
[
r
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment