fixing some bugs

parent 61cb8472
This diff could not be displayed because it is too large.
...@@ -14,7 +14,7 @@ class IronityAnalyzer(Analyzer): ...@@ -14,7 +14,7 @@ class IronityAnalyzer(Analyzer):
maxEmbedding: The number of max_position_embedings in the config.json of the model selected. maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
""" """
def __init__(self, task = "text-classification",modelIronity = 'dtomas/roberta-base-bne-irony', allScores = True, maxEmbedding = 512): def __init__(self, task = "text-classification",modelIronity = 'dtomas/roberta-base-bne-irony', allScores = True, maxEmbedding = 514):
""" """
Create an ironic analyzer. Create an ironic analyzer.
...@@ -28,8 +28,8 @@ class IronityAnalyzer(Analyzer): ...@@ -28,8 +28,8 @@ class IronityAnalyzer(Analyzer):
model = AutoModelForSequenceClassification.from_pretrained(modelIronity) model = AutoModelForSequenceClassification.from_pretrained(modelIronity)
model.config.id2label = {0: 'NI', 1: 'I'} model.config.id2label = {0: 'NI', 1: 'I'}
model.config.label2id = {'NI': 0, 'I': 1} model.config.label2id = {'NI': 0, 'I': 1}
tokenizer = AutoTokenizer.from_pretrained(modelIronity, model_max_length=512) tokenizer = AutoTokenizer.from_pretrained(modelIronity)
self.ironityClassifier = pipeline(task,model= model, tokenizer=tokenizer,return_all_scores=allScores) self.ironityClassifier = pipeline(task,model= model, tokenizer=tokenizer,return_all_scores=allScores, truncation=True)
else: else:
self.ironityClassifier = pipeline(task,model= modelIronity, return_all_scores=allScores) self.ironityClassifier = pipeline(task,model= modelIronity, return_all_scores=allScores)
self.maxEmbeding = maxEmbedding self.maxEmbeding = maxEmbedding
......
...@@ -78,7 +78,6 @@ class NERAnalyzer(Analyzer): ...@@ -78,7 +78,6 @@ class NERAnalyzer(Analyzer):
textner.append(doc[i].ent_type_) textner.append(doc[i].ent_type_)
else: else:
textner.append(doc[i].text) textner.append(doc[i].text)
print(textner)
self.textNER = " ".join(textner) self.textNER = " ".join(textner)
for ent in doc.ents: for ent in doc.ents:
#Guardamos el diccionario obtenido para la categoria de la palabra (si este existe) #Guardamos el diccionario obtenido para la categoria de la palabra (si este existe)
......
...@@ -199,10 +199,10 @@ class Sequence(ABC): ...@@ -199,10 +199,10 @@ class Sequence(ABC):
ruta = level.split("/") ruta = level.split("/")
children = [self.children] children = [self.children]
results=[] results=[]
for r in ruta: for idx, r in enumerate(ruta):
for child in children: for child in children:
if r in child: if r in child:
if r == ruta[-1]: if r == ruta[-1] and idx == len(ruta)-1:
results.extend(child[r]) results.extend(child[r])
else: else:
children = [c.children for c in child[r]] children = [c.children for c in child[r]]
...@@ -230,8 +230,8 @@ class Sequence(ABC): ...@@ -230,8 +230,8 @@ class Sequence(ABC):
children = [self.children] children = [self.children]
metadata = [self.metadata] metadata = [self.metadata]
results=[] results=[]
for r in ruta: for idx, r in enumerate(ruta):
if r == ruta[-1]: if r == ruta[-1] and idx == len(ruta)-1:
for m in metadata: for m in metadata:
if r in m: if r in m:
results.append(m[r]) results.append(m[r])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment