fixing some bugs

364dde8c · Estrella Vallecillo · 61cb8472 · 364dde8c · 364dde8c · 364dde8c
Commit 364dde8c authored Jun 23, 2022 by Estrella Vallecillo
Showing with 7 additions and 8 deletions
Examples/AnalyzeADataframe.ipynb
Examples/Example.ipynb
textflow/IronityAnalyzer.py
textflow/NERAnalyzer.py
textflow/Sequence.py
--- a/Examples/AnalyzeADataframe.ipynb
+++ b/Examples/AnalyzeADataframe.ipynb
--- a/Examples/Example.ipynb
+++ b/Examples/Example.ipynb
--- a/textflow/IronityAnalyzer.py
+++ b/textflow/IronityAnalyzer.py
@@ -14,7 +14,7 @@ class IronityAnalyzer(Analyzer):
        maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
    """
-    def __init__(self, task = "text-classification",modelIronity = 'dtomas/roberta-base-bne-irony', allScores = True, maxEmbedding = 512):
+    def __init__(self, task = "text-classification",modelIronity = 'dtomas/roberta-base-bne-irony', allScores = True, maxEmbedding = 514):
        """
        Create an ironic analyzer.
@@ -28,8 +28,8 @@ class IronityAnalyzer(Analyzer):
            model = AutoModelForSequenceClassification.from_pretrained(modelIronity)
            model.config.id2label = {0: 'NI', 1: 'I'}
            model.config.label2id = {'NI': 0, 'I': 1}
-            tokenizer = AutoTokenizer.from_pretrained(modelIronity, model_max_length=512)
+            tokenizer = AutoTokenizer.from_pretrained(modelIronity)
-            self.ironityClassifier = pipeline(task,model= model, tokenizer=tokenizer,return_all_scores=allScores)
+            self.ironityClassifier = pipeline(task,model= model, tokenizer=tokenizer,return_all_scores=allScores, truncation=True)
        else:
            self.ironityClassifier = pipeline(task,model= modelIronity, return_all_scores=allScores)
        self.maxEmbeding = maxEmbedding

--- a/textflow/NERAnalyzer.py
+++ b/textflow/NERAnalyzer.py
@@ -78,7 +78,6 @@ class NERAnalyzer(Analyzer):
                textner.append(doc[i].ent_type_)
            else:
                textner.append(doc[i].text)
-        print(textner)
        self.textNER = " ".join(textner) 
        for ent in doc.ents:
            #Guardamos el diccionario obtenido para la categoria de la palabra (si este existe)

--- a/textflow/Sequence.py
+++ b/textflow/Sequence.py
@@ -199,10 +199,10 @@ class Sequence(ABC):
        ruta = level.split("/")
        children = [self.children]
        results=[]
-        for r in ruta:
+        for idx, r in enumerate(ruta):
            for child in children:
                if r in child:
-                    if r == ruta[-1]:
+                    if r == ruta[-1] and idx == len(ruta)-1:
                        results.extend(child[r])
                    else:
                        children = [c.children for c in child[r]]
@@ -230,8 +230,8 @@ class Sequence(ABC):
        children = [self.children]
        metadata = [self.metadata]
        results=[]
-        for r in ruta:
+        for idx, r in enumerate(ruta):
-            if r == ruta[-1]:
+            if r == ruta[-1] and idx == len(ruta)-1:
                for m in metadata:
                    if r in m:
                        results.append(m[r])