deleting some comments

a359b5fa · Estrella Vallecillo · e9f93ab1 · a359b5fa · a359b5fa · a359b5fa
Commit a359b5fa authored May 18, 2022 by Estrella Vallecillo
Showing with 254 additions and 160 deletions
poetry.lock
pyproject.toml
textflow/Analyzer.py
textflow/ComplexityAnalyzer.py
textflow/EmotionAnalyzer.py
textflow/LemmaAnalyzer.py
textflow/POSAnalyzer.py
textflow/PolarityAnalyzer.py
textflow/Sequence.py
textflow/StylometryAnalyzer.py
textflow/VolumetryAnalyzer.py
--- a/poetry.lock
+++ b/poetry.lock
@@ -539,7 +539,7 @@ telegram = ["requests"]

 [[package]]
 name = "transformers"
-version = "4.19.1"
+version = "4.19.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 category = "main"
 optional = false
@@ -653,8 +653,8 @@ python-versions = "*"

 [metadata]
 lock-version = "1.1"
-python-versions = "3.8"
-content-hash = "f559d5695f1365c162f02c2146df48de52ad2d38e1b4a26476c7a662dc065365"
+python-versions = "^3.8"
+content-hash = "469bc77da37a726f078d5ae68733d1edabc0d6e9d613f137f57fb0ffde45b43d"

 [metadata.files]
 atomicwrites = [
@@ -1143,8 +1143,8 @@ tqdm = [
    {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"},
 ]
 transformers = [
-    {file = "transformers-4.19.1-py3-none-any.whl", hash = "sha256:16d3dd257d459c2598e2548a9e6875c10b7db5e44494d93b3c0a5c60afad667f"},
-    {file = "transformers-4.19.1.tar.gz", hash = "sha256:6fb30ee534a25b6b3fc7064c280b7f44abf8c9bd1fb358860ebe4fd392bf15f5"},
+    {file = "transformers-4.19.2-py3-none-any.whl", hash = "sha256:1416315b7c5ff1f56d3915f416b67aa254a9907fbb73ef7f7bffc9210446b5fa"},
+    {file = "transformers-4.19.2.tar.gz", hash = "sha256:e19a4ff07458eda143c738e5259caf48449fcf078a63d6b1bd1aa806543440a3"},
 ]
 typer = [
    {file = "typer-0.4.1-py3-none-any.whl", hash = "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3"},

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ description = "A text analysis library for Python"
 authors = ["Jaime Collado <jcollado@ujaen.es>", "Estrella Vallecillo <mevr0003@red.ujaen.es>"]

 [tool.poetry.dependencies]
-python = "3.8"
+python = "^3.8"
 nltk = "^3.7"
 spacy = "^3.3.0"
 transformers = "^4.19.0"

--- a/textflow/Analyzer.py
+++ b/textflow/Analyzer.py
@@ -10,14 +10,16 @@ class Analyzer(ABC):

    @abstractmethod
    def analyze(self, functionAnalyzer,sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = "", analyzeMetadata: Optional[bool] = False): #TODO
-        """Analyze a sequence
+        """
+        Abstract Class that analyze a sequence.

        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+            functionAnalyzer: the function of the analyzer.
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis resut.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result. 
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False).

        Raises:
            ValueError if the levelOfResult is incorrect
@@ -65,7 +67,7 @@ class Analyzer(ABC):
                    else:
                        raise ValueError(f"Sequence level '{r}' not found in {child}")

-        
+
    

    
\ No newline at end of file
--- a/textflow/ComplexityAnalyzer.py
+++ b/textflow/ComplexityAnalyzer.py
@@ -11,62 +11,47 @@ from textflow.Analyzer import Analyzer


 creaPath = os.path.join(os.path.dirname(__file__), 'Crea-5000.txt')
+spacy.cli.download("es_core_news_sm")

 class ComplexityAnalyzer(Analyzer):
-    def __init__(self, rutaArchivoCrea = creaPath,lang = "es"):
-        """Creates an analyzer from an input object.
+    def __init__(self, rutaArchivoCrea = creaPath, nlp = spacy.load("es_core_news_sm")):
+        """
+        Create a complexity analyzer from an input object.

        Args:
-            function: the function of the analyzer like count word, files...
+            rutaArchivoCrea: the file that contains the most frequence words of spanish language
            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
        """
-        if lang == "es":
-            spacy.cli.download("es_core_news_sm")
-            self.nlp = spacy.load("es_core_news_sm")
-            #Vamos a cargar CREA:
-            self.dicFreqWords=self.read(rutaArchivoCrea)
+        self.nlp = nlp
+        #Vamos a cargar CREA:
+        self.dicFreqWords=self.read(rutaArchivoCrea)

-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
+    
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): 
+        """
+        Analyze a sequence with a complexity function.

        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result. 
+            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False).

        Raises:
            ValueError if the levelOfResult is incorrect
        """
        super().analyze(self.complexity,sequence, tag, levelOfAnalyzer, levelOfResult, True)
-        '''if levelOfResult == "":
-            analyzeResult = sequence.filterMetadata(levelOfAnalyzer,self.function)#TODO
-            resultOfAnalisys= []
-            for i in analyzeResult:
-                resultOfAnalisys.append(i)
-            sequence.metadata[tag] = resultOfAnalisys
-        else:
-            children = [sequence.children]
-            ruta = levelOfResult.split("/")
-            for r in ruta: #Para cada nivel de la ruta
-                for child in children: #Miramos en todas las secuencias disponibles
-                    if r in child: #Si dentro de la secuencia actual está r
-                        if r == ruta[-1]:
-                            for seq in child[r]:
-                                analyzeResult = seq.filterMetadata(levelOfAnalyzer,self.function)  
-                                resultOfAnalisys= []
-                                for i in analyzeResult:
-                                    resultOfAnalisys.append(i)
-                                seq.metadata[tag] = resultOfAnalisys                           
-                        else:
-                            children = [c.children for c in child[r]]
-                    else:
-                        raise ValueError(f"Sequence level '{r}' not found in {child}") '''


    def read(self,fichero):
+        """
+        Function that read a txt File.
+
+        Args:
+            fichero: the path of the file to read.
+
+        """
        with open(fichero,'r',encoding='latin-1') as file:
            next(file)
            lines = file.readlines()
@@ -80,6 +65,16 @@ class ComplexityAnalyzer(Analyzer):


    def complexity(self, arrayText):
+        """
+        Function that analyzes the complexity of a list of texts.
+            
+        Args:
+            arrayText: list that contains the texts that we want to analyze.
+
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        """
        arrayResults =[]
        for text in arrayText:
            doc= self.nlp (text)
@@ -123,7 +118,12 @@ class ComplexityAnalyzer(Analyzer):


    def simplesMetrics(self, doc):
-        #Simple metrics son los signos de puntuación, el numero de frases, el numero de frases con contenido...
+        """
+        Function that calculate of a doc.
+            
+        Args:
+            doc: sequence of tokens object.
+        """
        self.sentences = [s for s in doc.sents]
        self.numSentences = len(self.sentences)
        pcs = []
@@ -155,13 +155,19 @@ class ComplexityAnalyzer(Analyzer):
        self.numChars = numChars

    def analyzeLegibility(self,doc):
+        """
+        Function that analyze the legibility of a text.
+
+        Args:
+            doc: a sequence of tokens.
+        """
        self.readabilityFH = 206.84 - 0.60*(self.numSyllabes/self.numWords) - 1.02*(self.numWords/self.numSentences)
        self.perspicuityIFSZ = 206.835 - ((62.3*self.numSyllabes)/self.numWords) - (self.numWords/self.numSentences)
        
        numLetters = 0
        listLenLetters =[]
        for token in doc:
-            if token.text.isalpha(): #Si es una palabra
+            if token.text.isalpha(): 
                numLetters += len(token.text)
                listLenLetters.append(len(token.text))
        
@@ -172,6 +178,9 @@ class ComplexityAnalyzer(Analyzer):
        self.muLegibility = (self.numWords/(self.numWords-1))*(avgLettersWords/listLenLetters.var())*100
        
    def lexicalIndex(self):
+        """
+        Function that calculate different lexical index of a text.
+        """
        self.numContentWords = reduce((lambda a, b: a + b), [len(s) for s in self.posContentSentences])
        self.numDistinctContentWords = len(set([w.text.lower() for s in self.posContentSentences for w in s]))
        if self.numContentWords == 0:
@@ -184,10 +193,16 @@ class ComplexityAnalyzer(Analyzer):
        

    def readability(self):
+        """
+        Function that calculate the readability of a text.
+        """
        self.autoReadabilityIndex = 4.71 * self.numChars / self.numWords + 0.5 * self.numWords/self.numContentSentences
        self.spauldingScore = 1.609*(self.numWords / self.numContentSentences) + 331.8* (self.numRareWord /self.numWords) + 22.0

    def countRareAndLowWord(self):
+        """
+        Function that count the rare and low words of a text.
+        """
        freqWord = sorted(self.dicFreqWords, key = self.dicFreqWords.__getitem__, reverse = True)[:1500]
        countRareWord = 0
        countLowWord = 0
@@ -201,6 +216,9 @@ class ComplexityAnalyzer(Analyzer):
        self.numLowWord = countLowWord          

    def sentenceComplexity(self):
+        """
+        Function that calculate the complexity at sentence level.
+        """
        numComplexSentence=0
        for sentence in self.sentences:
            verb = False
@@ -224,6 +242,12 @@ class ComplexityAnalyzer(Analyzer):
        

    def countSyllabes(self, text):
+        """
+        Function that count the syllabes of a text.
+
+        Args:
+            text: a string with the text to analyze.
+        """
        t = re.sub(r'y([aáeéiíoóuú])', '\\1', text.lower())
        t = re.sub(r'[aáeéioóu][iuy]', 'A', t.lower())
        t = re.sub(r'[iu][aáeyéioóu]', 'A', t).lower()
@@ -240,6 +264,9 @@ class ComplexityAnalyzer(Analyzer):
            return 1 + max(self.treeHeight(x, cont) for x in root.children)

    def embeddingDepth(self):
+        """
+        Function that calculate the depth of the embedding of a text.
+        """
        roots = [sent.root for sent in self.sentences]
        max_list = []
        max_list = [self.treeHeight(root,0) for root in roots]
@@ -251,9 +278,12 @@ class ComplexityAnalyzer(Analyzer):
        return self.max_max_list,  self.min_max_list, self.mean_max_list

    def ageReadability(self):
+        """
+        Function that calculate the age readability of a text.
+        """
        self.solReadability = -2.51 + 0.74*(3.1291+1.0430*math.sqrt(self.numWords3Syllabes*(30/self.numSentences)))
        self.minAge = 0.2495* (self.numWords/self.numSentences) + 6.4763*(self.numSyllabes/self.numWords) - 7.1395
        self.crawford = -20.5*(self.numSentences/self.numWords)+4.9*(self.numSyllabes/self.numWords)-3.407
-        pass
+        


--- a/textflow/EmotionAnalyzer.py
+++ b/textflow/EmotionAnalyzer.py
@@ -8,33 +8,40 @@ import torch

 class EmotionAnalyzer(Analyzer):
    def __init__(self, task = "text-classification",modelEmotions = 'pysentimiento/robertuito-emotion-analysis', allScores = True):
-        """Creates an analyzer from an input object.
+        """
+        Create a emotions analyzer.

        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+            task: the task defining which pipeline will be returned.
+            model: the model that will be used by the pipeline to make predictions.
+            allScores: True, if we want that the classifier returns all scores. False, in other case.
        """
        self.emotionsClassifier = pipeline(task,model=modelEmotions, return_all_scores=allScores)


-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
-
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): 
+        """
+        Analyze a sequence with a emotion function.
+            
        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
-
-        Raises:
-            ValueError if the levelOfResult is incorrect
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result. 
        """
        super().analyze(self.emotions,sequence, tag, levelOfAnalyzer, levelOfResult, True) 


    def emotions(self, arrayText):
+        """
+        Function that analyzes the emotions of a list of texts.
+            
+        Args:
+            arrayText: list that contains the texts that we want to analyze.
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        """
        arrayResults =[]
        for text in arrayText:
            prediction = self.emotionsClassifier(text)

--- a/textflow/LemmaAnalyzer.py
+++ b/textflow/LemmaAnalyzer.py
@@ -8,33 +8,39 @@ spacy.cli.download("es_core_news_sm")

 class LemmaAnalyzer(Analyzer):
    def __init__(self, nlp = spacy.load("es_core_news_sm"), posNoContent = ["PUNCT", "SPACE", "SYM"]):
-        """Creates an analyzer from an input object.
+        """Create an analyzer from an input object.

        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+            nlp: a model of language.
+            posNoContent: a list with the POS tag from which we don't want to get the lemma.
        """
        self.nlp = nlp
        self.posNoContent = posNoContent

-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): 
+        """
+        Analyze a sequence with a lemma function.

        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
-
-        Raises:
-            ValueError if the levelOfResult is incorrect
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result. 
        """
        super().analyze(self.lemmas,sequence, tag, levelOfAnalyzer, levelOfResult, True)


    def lemmas(self, arrayText):
+        '''
+        Function that get the lemmas of a list of texts.
+
+        Args:
+            arrayText: list that contains the texts that we want to analyze
+        
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        '''
        arrayResult = []
        for text in arrayText:
            sequenceLemmas = []

--- a/textflow/POSAnalyzer.py
+++ b/textflow/POSAnalyzer.py
@@ -8,31 +8,35 @@ spacy.cli.download("es_core_news_sm")

 class POSAnalyzer(Analyzer):
    def __init__(self, nlp = spacy.load("es_core_news_sm")):
-        """Creates an analyzer from an input object.
+        """
+        Create a POS analyzer from an input object.

        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+            nlp: a model of language.
        """
        self.nlp = nlp

-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
-
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): 
+        """
+        Analyze a sequence with a POS Tagger function.
        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
-
-        Raises:
-            ValueError if the levelOfResult is incorrect
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result. 
        """
        super().analyze(self.pos,sequence, tag, levelOfAnalyzer, levelOfResult, True)

    def pos(self,arrayText):
+        '''
+        Function that get the POS tag of a list of texts.
+
+        Args:
+            arrayText: list that contains the texts that we want to analyze.
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        '''
        arrayResults = []
        for text in arrayText:
            srcPOS = []

--- a/textflow/PolarityAnalyzer.py
+++ b/textflow/PolarityAnalyzer.py
-import os
-import spacy
-import spacy.cli
 from typing import Optional
 from textflow.Analyzer import Analyzer
 from transformers import pipeline
@@ -8,32 +5,40 @@ import torch

 class PolarityAnalyzer(Analyzer):
    def __init__(self, task = "text-classification",modelPolarity = 'finiteautomata/beto-sentiment-analysis', allScores = True):
-        """Creates an analyzer from an input object.
+        """
+        Create a polarity analyzer.

        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+            task: the task defining which pipeline will be returned
+            model: the model that will be used by the pipeline to make predictions
+            allScores: True, if we want that the classifier returns all scores. False, in other case
        """
        self.polarityClassifier = pipeline(task,model= modelPolarity, return_all_scores=allScores)
        

-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
-    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): #TODO
-        """Analyze a sequence
+    
+    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""): 
+        """
+        Analyze a sequence with a polarity function.

        Args:
-            sequence: the Sequence we want to analyze
-            tag: the label to store the analysis resut
-            levelOfAnalyzer: the path of the sequence level to analyze inside of the result(la subsequencia a analizar dentro de la sequencia en la que queremos almacenar el resultado)
-            levelOfResult: the path of the sequence level to store the result. (Podemos querer analizar los tokens pero almacenarlo a nivel de oracion)
-            analyzeMetadata: boolean, if the result of the analyzer is applied in metadata (True) or in children(False)
-
-        Raises:
-            ValueError if the levelOfResult is incorrect
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result.
        """
        super().analyze(self.polarity,sequence, tag, levelOfAnalyzer, levelOfResult, True)

    def polarity(self, arrayText):
+        """
+        Function that analyzes the polarity of a list of texts.
+
+        Args:
+            arrayText: list that contains the texts that we want to analyze
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        """
        arrayResults =[]
        for text in arrayText:
            prediction = self.polarityClassifier(text)

--- a/textflow/Sequence.py
+++ b/textflow/Sequence.py
@@ -10,7 +10,7 @@ from nltk.tokenize import RegexpTokenizer
 class SequenceIterator:
    def __init__(self, children):
        """
-        Creates a sequenceIterator from a Sequence.
+        Create a sequenceIterator from a Sequence.
        Args:
            children: A list with the values of the attribute children of a Sequence.
        """
@@ -39,7 +39,7 @@ class SequenceIterator:
            raise StopIteration


-_VALID_FORMATS = ["directory","string", "text", "token", None]
+_VALID_FORMATS = ["directory","string", "file", "token", None]

 class Sequence:
    """Summary of class here.
@@ -59,6 +59,7 @@ class Sequence:
            format: A string containing the input data's type.
            src: An object representing the input data. It can be a string for a
            string format or a file path for a text format.
+
        Raises:
            ValueError: If the format is wrong.    
        """ 
@@ -71,14 +72,14 @@ class Sequence:
        
        self.format = format
        self.children = {}
-        self.metadata = {"text": " "}
+        self.metadata = {}
        if format == "token":
            if not isinstance(src, str):
                raise ValueError(f"{src} is not an instance of token")
            self.metadata["text"] = src
        if format == "string":
            self.initFromString(src,"tokens","token",tokenizer)
-        if format == "text":
+        if format == "file":
            self.initFromDocument(src,"tokens","token", tokenizer)
        if format == "directory":
            self.initFromDirectory(src,"directory","files",tokenizer)
@@ -86,12 +87,12 @@ class Sequence:
    def initFromDirectory(self, directory, labelDirectory, labelFile, tokenizer):
        '''
        Initialize a Sequence from a directory 
+
        Args:
            directory: the path of a directory as string
            labelDirectory: the name of the children dictionary entry for the subpaths
            labelFile: the name of the children dictionary entry for the files
        '''
-        #print(os.path.abspath((os.getcwd())))
        self.format = "directory"
        self.metadata["nameFiles"] = []
        self.metadata["directoriesPath"] = []
@@ -102,9 +103,9 @@ class Sequence:
            if os.path.isfile(directory+"/"+file):
                self.metadata["nameFiles"].append(file)
                if labelFile in self.children:
-                    self.children[labelFile].append(Sequence("text", directory+"/"+file ))
+                    self.children[labelFile].append(Sequence("file", directory+"/"+file ))
                else:
-                    self.children[labelFile]= [Sequence("text", directory+"/"+file)]
+                    self.children[labelFile]= [Sequence("file", directory+"/"+file)]
            else:
                self.metadata["directoriesPath"].append(directory+"/"+file)
                if labelDirectory in self.children:
@@ -116,12 +117,13 @@ class Sequence:
    def initFromDocument(self, documentPath, labelSubSequence, formatSubsequence, tokenizer):
        '''
        Initialize a Sequence from a document 
+
        Args:
            documentPath: the path of a document as string
            labelSubSequence: the name of the children dictionary entry for the subsequence as string
            formatSubSequence: the format of the subsequence in children dictionary entry as string
        '''
-        self.format = "text"
+        self.format = "file"
        with open(documentPath, "r") as f:
            txt = f.read()
        self.children[labelSubSequence] = [Sequence(formatSubsequence,token_src) for token_src in tokenizer.tokenize(txt)]
@@ -130,10 +132,12 @@ class Sequence:
    def initFromString(self, srcString, labelSubSequence, formatSubsequence, tokenizer):
        '''
        Initialize a Sequence from a string 
+
        Args:
            srcString: source string of the sequence
            labelSubSequence: the name of the children dictionary entry for the subsequence as string
            formatSubSequence: the format of the subsequence in children dictionary entry as string
+
        Raises:
            ValueError: If srcString isn't a string .
        '''
@@ -171,8 +175,9 @@ class Sequence:

    def __len__(self):
        '''
-        Calculate the length of a Sequence
+        Calculate the length of a Sequence.
        The length of a Sequence is the length of the children.
+
        Returns:
            A number with the length of the Sequence
        '''
@@ -190,6 +195,7 @@ class Sequence:
    def __getitem__(self, idx):
        '''
        Get the value of a key from the dictionary of children 
+
        Args:
            idx: a string that represent the key of the children dictionary
                 or an integer that represent the position of the key in children dictionary keys 
@@ -307,7 +313,6 @@ class Sequence:
                            metadata = [c.metadata for c in child[r]]
                        else:
                            raise ValueError(f"Sequence level '{r}' not found in {child}")
-        #yield criteria(results)
        cont=0
        gen = criteria(results)
        for r in gen:

--- a/textflow/StylometryAnalyzer.py
+++ b/textflow/StylometryAnalyzer.py
@@ -9,21 +9,46 @@ import math

 from textflow.Analyzer import Analyzer

-class StylometryAnalyzer(Analyzer): #TODO
+class StylometryAnalyzer(Analyzer): 

    def __init__(self,stopwords, puntuation = string.punctuation,tokenizer = WhitespaceTokenizer()):
+        """
+        Create a stylometry analyzer from an input object.
+
+        Args:
+            stopwords: a list with stopwords
+            puntuation: a list with puntuationMarks
+            tokenizer: a function to tokenize the text
+        """
        self.stopwords = stopwords
        self.puntuation = puntuation
        self.tokenizer = tokenizer

-    #Este analizador, solo puede analizar cadenas de texto, por lo que solo tiene sentido que use el atributo text de metadata
+    
    def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str]= ""):
+        """
+        Analyze a sequence with a stylometry function.
+
+        Args:
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result.
+        """
        super().analyze(self.stylometry,sequence, tag, levelOfAnalyzer, levelOfResult, True)

    def stylometry(self, arrayText):
+        '''
+        Function that get the stylometry (somes index, frequence of words ) of a list of texts.
+
+        Args:
+            arrayText: list that contains the texts that we want to analyze
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        '''
        resultsList = []
        for t in arrayText:
-            #doc = self.nlp(text)
            t.lower()
            tokens = self.tokenizer.tokenize (t)
            text= [token.lower() for token in tokens]
@@ -46,6 +71,12 @@ class StylometryAnalyzer(Analyzer): #TODO
        return resultsList

    def funcionesTTR(self, text):
+        """
+        Function that calculate different TTR index.
+
+        Args:
+            text: a string with the text to analyze.
+        """
        self.uniqueWords = [token[0] for token in self.freqWord]
        self.numWordFreqOne = len( [token[0] for token in self.freqWord if token[1] == 1 ])
        self.TTR = len(self.uniqueWords) / len(text)
@@ -64,6 +95,14 @@ class StylometryAnalyzer(Analyzer): #TODO


    def freqWords(self,tokens, stopWords, puntuationMarks):
+        """
+        Function that count the frequence of stopWords, puntuationMarks and words of a list of tokens.
+
+        Args:
+            tokens: a list of tokens that we want to count the frequence.
+            stopwords: a list with the stopwords.
+            puntuationMarks: a list with the puntuation marks.
+        """
        freqStopWords = {}
        freqPuntuationMarks = {}
        freqWord ={} 

--- a/textflow/VolumetryAnalyzer.py
+++ b/textflow/VolumetryAnalyzer.py
@@ -5,16 +5,25 @@ from textflow.Analyzer import Analyzer

 class VolumetryAnalyzer(Analyzer):
    def __init__(self, tokenizer= WhitespaceTokenizer()):
-        """Creates an analyzer from an input object.
-
-        Args:
-            function: the function of the analyzer like count word, files...
-            isMetadata: boolean, if the result of the analyzer is stored in metadata (True) or in children(False)
+        """
+        Create a volumetry analyzer from an input object.
+            Args:
+                tokenizer: the way to split a text into token 
        """
        self.tokenizer = tokenizer


    def volumetry(self, arrayText):
+        """
+        Function that analyzes the volumetry of a list of texts.
+
+        Args:
+            arrayText: list that contains the texts that we want to analyze.
+        
+        Returns:
+            A list with the dictionaries. Each dictionary contains the result
+            of the analysis of the corresponding text.
+        """
        arrayResults =[]
        for texts in arrayText:
            text = self.tokenizer.tokenize(texts)
@@ -27,33 +36,18 @@ class VolumetryAnalyzer(Analyzer):
            arrayResults.append(dicResults)
        return arrayResults

-    #La secuencia siempre debe tener un atributo texto(metadata) para que este funcione
-    #Contar el numero de palabras, numero de palabras unicas, numero de caracteres y numero medio de caracteres
+    
    def analyze(self,sequence,tag,levelOfAnalyzer,levelOfResult:Optional[str] = ""):
-        super().analyze(self.volumetry,sequence, tag, levelOfAnalyzer, levelOfResult, True)
-        '''children = [sequence.children]
-        ruta = levelOfAnalyze.split("/")
-        for r in ruta: #Para cada nivel de la ruta
-            for child in children: #Miramos en todas las secuencias disponibles
-                if r in child: #Si dentro de la secuencia actual está r
-                    if r == ruta[-1]:
-                        for seq in child[r]:
-                            if "text" not in seq.metadata:
-                                raise ValueError(f"Level text not found in {seq.metadata.keys()}")
-                            else:
-                                text = seq.metadata["text"].split(" ")
-                            
-                                volumetry= {
-                                    "words" : len(text),
-                                    "uniqueWords" : len(set(text)),
-                                    "chars" : len(seq.metadata["text"]),
-                                    "avgWordsLen" : round(volumetry["chars"] / volumetry["words"])
-                                }
+        """
+        Analyze a sequence with a volumetry function.

-                                seq.metadata["volumetry"] = volumetry
-                    else:
-                        children = [c.children for c in child[r]]
-                else:
-                    raise ValueError(f"Sequence level '{r}' not found in {child}")'''
+        Args:
+            sequence: the Sequence we want to analyze.
+            tag: the label to store the analysis result.
+            levelOfAnalyzer: the path of the sequence level to analyze inside of the result.
+            levelOfResult: the path of the sequence level to store the result.
+        """
+        super().analyze(self.volumetry,sequence, tag, levelOfAnalyzer, levelOfResult, True)
+        

   
\ No newline at end of file