Refactoring some classes

2fc8f450 · Estrella Vallecillo · a359b5fa · 2fc8f450 · 2fc8f450 · 2fc8f450
Commit 2fc8f450 authored May 23, 2022 by Estrella Vallecillo
Showing with 761 additions and 128 deletions
Examples/Example.ipynb
Examples/ExampleDirectory/Documento sin título.txt
textflow/Analyzer.py
textflow/ComplexityAnalyzer.py
textflow/EmotionAnalyzer.py
textflow/LemmaAnalyzer.py
textflow/POSAnalyzer.py
textflow/PolarityAnalyzer.py
textflow/Sequence.py
textflow/SequenceDirectory.py
textflow/SequenceFile.py
textflow/SequenceString.py
textflow/SequenceToken.py
textflow/StylometryAnalyzer.py
textflow/VolumetryAnalyzer.py
--- a/Examples/Example.ipynb
+++ b/Examples/Example.ipynb
--- a/Examples/ExampleDirectory/Documento sin título.txt
+++ b/Examples/ExampleDirectory/Documento sin título.txt
+Veo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahí, me gustaría que compartiérais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podría acarrearme, y las compruebo una y otra vez, y esto me angustia.
+Veo que en este foro, afortunadamente para vosotros, no hay mucha gente que sufra de TOC.Si hay alguien por ahí, me gustaría que compartiérais vuestras opiniones, yo compruebo las cosas que hago porque tengo miedo de haberme equivocado y pienso en las consecuencias que ese error podría acarrearme, y las compruebo una y otra vez, y esto me angustia.
+Sé que abrí un post parecido hace tiempo, pero ya quedó abajo y por tanto en el olvido, por eso abro este por si alguna persona nueva con este problema lo lee.Me gustaría saber qué os recetan a vosotros para esto y si os va bien.
+
+
+Saludos.
+Nereida.
\ No newline at end of file
--- a/textflow/Analyzer.py
+++ b/textflow/Analyzer.py
@@ -7,6 +7,9 @@ from abc import ABC, abstractmethod


 class Analyzer(ABC):
+    """
+    Abstract class that provides methods to analyze sequences
+    """

    @abstractmethod
    def analyze(self, functionAnalyzer,sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = "", analyzeMetadata: Optional[bool] = False): #TODO

--- a/textflow/ComplexityAnalyzer.py
+++ b/textflow/ComplexityAnalyzer.py
@@ -14,6 +14,40 @@ creaPath = os.path.join(os.path.dirname(__file__), 'Crea-5000.txt')
 spacy.cli.download("es_core_news_sm")

 class ComplexityAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the complexity of the text of a sequence.
+
+    Attributes:
+        nlp: a model of language.
+        dicFreqWords: a dictionary with the most frequence words of spanish language.
+        numContentSentences: the number of the content sentences in the text.
+        numComplexSents: the nomber of complex sentences in the text.
+        avgLenSentence: the average length of the sentences in the text. 
+        numPuntuationMark: the number of punctuation marks in the text.
+        numWords: the number of words in the text.
+        numRareWord: the number of rare words in the text.
+        numSyllabes: the number of syllabes in the text.
+        numChars: the number of chars in the text.
+        indexLowFreqWords: the index of low frequence words of a text.           
+        lexicalDistributionIndex: the index of lexical distribution of a text.               
+        lexicalComplexity: the index of lexical complexity of a text.
+        spauldingScore: the Spaulding's readability score.
+        sentenceComplexityIndex: the index of sentence complexity.
+        autoReadabilityIndex: the autoreadability index of a text.                  
+        readabilityFH: the Fernandez Huerta readability of a text.
+        perspicuityIFSZ: the Flesch-Szigriszt perspicuity of a text. 
+        poliniComprensibility: the Polini comprehensibility of a text.
+        muLegibility: the Mu legibility of a text.
+        minAge: the minimum age to read a text. 
+        solReadability: the SOL readability of a text.
+        crawford: the Crawford's years of a text.
+        min_max_list: the minimum of maximum tree depths.
+        max_max_list: the maximum of maximum tree depths.
+        mean_max_list: the mean of maximum tree depths.
+        
+    """
+
+
    def __init__(self, rutaArchivoCrea = creaPath, nlp = spacy.load("es_core_news_sm")):
        """
        Create a complexity analyzer from an input object.

--- a/textflow/EmotionAnalyzer.py
+++ b/textflow/EmotionAnalyzer.py
@@ -7,6 +7,13 @@ from transformers import pipeline
 import torch

 class EmotionAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the emotions of the text of a sequence.
+
+    Attributes:
+        polarityClassifier: a pipeline that uses a model for inference the emotions of the text of a sequence.
+    """
+
    def __init__(self, task = "text-classification",modelEmotions = 'pysentimiento/robertuito-emotion-analysis', allScores = True):
        """
        Create a emotions analyzer.

--- a/textflow/LemmaAnalyzer.py
+++ b/textflow/LemmaAnalyzer.py
@@ -7,6 +7,15 @@ from textflow.Analyzer import Analyzer
 spacy.cli.download("es_core_news_sm")

 class LemmaAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the lemmas of the text of a sequence.
+
+    Attributes:
+        nlp: a model of language.
+        posNoContent: a list with the POS tag from which we don't want to get the lemma. 
+                      Some of the POS tag that haven't content are "PUNCT", "SPACE", "SYM".
+    """
+
    def __init__(self, nlp = spacy.load("es_core_news_sm"), posNoContent = ["PUNCT", "SPACE", "SYM"]):
        """Create an analyzer from an input object.


--- a/textflow/POSAnalyzer.py
+++ b/textflow/POSAnalyzer.py
@@ -7,6 +7,13 @@ from textflow.Analyzer import Analyzer
 spacy.cli.download("es_core_news_sm")

 class POSAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the part-of-speech(POS) of the text of a sequence.
+
+    Attributes:
+        nlp: a model of language.
+    """
+
    def __init__(self, nlp = spacy.load("es_core_news_sm")):
        """
        Create a POS analyzer from an input object.

--- a/textflow/PolarityAnalyzer.py
+++ b/textflow/PolarityAnalyzer.py
@@ -4,6 +4,13 @@ from transformers import pipeline
 import torch

 class PolarityAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the polarity of the text of a sequence.
+
+    Attributes:
+        polarityClassifier: a pipeline that uses a model for inference the polarity of the text of a sequence.
+    """
+
    def __init__(self, task = "text-classification",modelPolarity = 'finiteautomata/beto-sentiment-analysis', allScores = True):
        """
        Create a polarity analyzer.

--- a/textflow/Sequence.py
+++ b/textflow/Sequence.py
-import os
 from typing import Optional
-from nltk.tokenize import TreebankWordTokenizer
-from nltk.tokenize import WhitespaceTokenizer
-from nltk.tokenize import SpaceTokenizer
-from nltk.tokenize import WordPunctTokenizer
-from nltk.tokenize import RegexpTokenizer
-
+from abc import ABC, abstractmethod

 class SequenceIterator:
+    """
+    A class that provides methods to iterate over the children of a sequence
+
+    Attributes:
+        idx: an integer with the position of the iterator.
+        children: a dictionary with the subsequence of a sequence.  
+    """
+
    def __init__(self, children):
        """
        Create a sequenceIterator from a Sequence.
@@ -39,115 +41,30 @@ class SequenceIterator:
            raise StopIteration


-_VALID_FORMATS = ["directory","string", "file", "token", None]
-
-class Sequence:
-    """Summary of class here.
-
-    Longer class information...
-    Longer class information...
+class Sequence(ABC):
+    """
+    Abstract class that provides methods to create a sequence from different formats

    Attributes:
-        id: ...
-        text: ...
-        sequences: ...
+        format: a string with the origin format of a sequence.
+        metadata: a dictionary with the metadata of a sequence.
+        children: a dictionary with the subsequence of a sequence. 
    """
-    def __init__(self, format: Optional[str] = None, src: Optional[object] = None, tokenizer: Optional[object] = None ):
-        """Creates a sequence from an input object.
-
-        Args:
-            format: A string containing the input data's type.
-            src: An object representing the input data. It can be a string for a
-            string format or a file path for a text format.

-        Raises:
-            ValueError: If the format is wrong.    
-        """ 
-        if format not in _VALID_FORMATS:
-            raise ValueError(
-                f"{format} is not a valid format. Valid formats: {_VALID_FORMATS}"
-            )
-        if tokenizer == None:
-            tokenizer = WhitespaceTokenizer()
-        
-        self.format = format
-        self.children = {}
-        self.metadata = {}
-        if format == "token":
-            if not isinstance(src, str):
-                raise ValueError(f"{src} is not an instance of token")
-            self.metadata["text"] = src
-        if format == "string":
-            self.initFromString(src,"tokens","token",tokenizer)
-        if format == "file":
-            self.initFromDocument(src,"tokens","token", tokenizer)
-        if format == "directory":
-            self.initFromDirectory(src,"directory","files",tokenizer)
-
-    def initFromDirectory(self, directory, labelDirectory, labelFile, tokenizer):
-        '''
-        Initialize a Sequence from a directory 
-
-        Args:
-            directory: the path of a directory as string
-            labelDirectory: the name of the children dictionary entry for the subpaths
-            labelFile: the name of the children dictionary entry for the files
-        '''
-        self.format = "directory"
-        self.metadata["nameFiles"] = []
-        self.metadata["directoriesPath"] = []
-        contenido = os.listdir(directory)
-        #print(contenido)
-        for file in contenido:
-            #print(file)
-            if os.path.isfile(directory+"/"+file):
-                self.metadata["nameFiles"].append(file)
-                if labelFile in self.children:
-                    self.children[labelFile].append(Sequence("file", directory+"/"+file ))
-                else:
-                    self.children[labelFile]= [Sequence("file", directory+"/"+file)]
-            else:
-                self.metadata["directoriesPath"].append(directory+"/"+file)
-                if labelDirectory in self.children:
-                    self.children[labelDirectory].append(Sequence("directory", directory+"/"+file,tokenizer ))
-                else:
-                    self.children[labelDirectory]= [Sequence("directory", directory+"/"+file, tokenizer)]
-        
-
-    def initFromDocument(self, documentPath, labelSubSequence, formatSubsequence, tokenizer):
+    @abstractmethod
+    def inicializeSequence(self,format):
        '''
-        Initialize a Sequence from a document 
+        Initializes the attributes of a sequence.

        Args:
-            documentPath: the path of a document as string
-            labelSubSequence: the name of the children dictionary entry for the subsequence as string
-            formatSubSequence: the format of the subsequence in children dictionary entry as string
-        '''
-        self.format = "file"
-        with open(documentPath, "r") as f:
-            txt = f.read()
-        self.children[labelSubSequence] = [Sequence(formatSubsequence,token_src) for token_src in tokenizer.tokenize(txt)]
-        self.metadata["text"] = txt
-
-    def initFromString(self, srcString, labelSubSequence, formatSubsequence, tokenizer):
+            format: a string with the origin format of the sequence.
        '''
-        Initialize a Sequence from a string 
-
-        Args:
-            srcString: source string of the sequence
-            labelSubSequence: the name of the children dictionary entry for the subsequence as string
-            formatSubSequence: the format of the subsequence in children dictionary entry as string
-
-        Raises:
-            ValueError: If srcString isn't a string .
-        '''
-        if not isinstance(srcString, str):
-            raise ValueError(f"{srcString} is not an instance of string")
-        self.format = "string"
-        self.children[labelSubSequence]= [Sequence(formatSubsequence,token_src) for token_src in tokenizer.tokenize(srcString)]
-        self.metadata["text"]= srcString
-
-
+        self.format = format
+        self.metadata={}
+        self.children={}
+        return self.format, self.metadata, self.children
+   
+    @abstractmethod
    def __str__(self):
        '''
         Convert a Sequence to a string
@@ -156,7 +73,8 @@ class Sequence:
           A string that contains the text of a Sequence  
        '''
        return str(self.metadata["text"])
-
+    
+    @abstractmethod
    def __repr__(self):
        '''
        Convert a Sequence to a string
@@ -173,6 +91,7 @@ class Sequence:
            ")"
        )

+    @abstractmethod
    def __len__(self):
        '''
        Calculate the length of a Sequence.
@@ -183,6 +102,7 @@ class Sequence:
        '''
        return len(self.children)

+    @abstractmethod
    def __iter__(self):
        '''
        Iterate in a Sequence
@@ -192,6 +112,7 @@ class Sequence:
        '''
        return SequenceIterator(list(self.children.values()))
    
+    @abstractmethod
    def __getitem__(self, idx):
        '''
        Get the value of a key from the dictionary of children 
@@ -218,13 +139,23 @@ class Sequence:
        else: # TODO: Should it support slices (e.g. [2:4])?
            raise ValueError(f"Sequence id '{idx}' not found in {self.children}")

+    @abstractmethod
    def __eq__(self, other):
+        '''
+        Check if a sequence it is the same that the current one.
+
+        Args:
+            other: a sequence to check if it is the same that the current one.
+        Returns:
+            True if the sequences are equals.
+            False in others cases.
+        '''
        if self.format == other.format and self.metadata == other.metadata and self.children == other.children:
            return True
        else:
            return False

-
+    @abstractmethod
    def depth(self,diccionaryList: Optional[list] = None):
        '''
        Calculate the maximum depth of a Sequence
@@ -253,7 +184,7 @@ class Sequence:
                        rutaMax = ruta
        return (profMax, rutaMax)
    
-
+    @abstractmethod
    def filter(self, level, criteria):
        '''
        Filter the children of a Sequence according to a criteria
@@ -282,8 +213,9 @@ class Sequence:
        for r in gen:
            yield gen[cont]
            cont+=1
-
-    def filterMetadata(self, level, criteria): #TODO
+    
+    @abstractmethod
+    def filterMetadata(self, level, criteria):
        '''
        Filter the children of a Sequence according to a criteria

@@ -298,21 +230,18 @@ class Sequence:
        children = [self.children]
        metadata = [self.metadata]
        results=[]
-        if len(ruta) == 1 and ruta[0] in metadata[0]:
-            results.append(metadata[0][ruta[0]])
-        else:
-            for r in ruta:
-                if r == ruta[-1]:
-                    for m in metadata:
-                        if r in m:
-                            results.append(m[r])
-                else:
-                    for child in children:
-                        if r in child:
-                            children = [c.children for c in child[r]]
-                            metadata = [c.metadata for c in child[r]]
-                        else:
-                            raise ValueError(f"Sequence level '{r}' not found in {child}")
+        for r in ruta:
+            if r == ruta[-1]:
+                for m in metadata:
+                    if r in m:
+                        results.append(m[r])
+            else:
+                for child in children:
+                    if r in child:
+                        children = [c.children for c in child[r]]
+                        metadata = [c.metadata for c in child[r]]
+                    else:
+                        raise ValueError(f"Sequence level '{r}' not found in {child}")
        cont=0
        gen = criteria(results)
        for r in gen:

--- a/textflow/SequenceDirectory.py
+++ b/textflow/SequenceDirectory.py
+import os
+from typing import Optional
+from nltk.tokenize import WhitespaceTokenizer
+from textflow.Sequence import Sequence
+from textflow.SequenceFile import SequenceFile
+from textflow.SequenceString import SequenceString
+
+
+class SequenceDirectory(Sequence):
+    """
+    A class that provides methods to create a sequence from a directory
+
+    Attributes:
+        format: a string with the origin format of a sequence.
+        metadata: a dictionary with the metadata of a sequence.
+        children: a dictionary with the subsequence of a sequence.
+    """
+
+
+    def __init__(self,src,listLabel = ["directories","files","tokens"],listClasses=[SequenceFile,SequenceString],listTokenizer=[WhitespaceTokenizer()]):
+        '''
+        Initialize a Sequence from a directory path
+
+        By default, create subsequences for any directories and files in the source directory 
+        and for each file, create subsequence, splitting the text of the file into words.
+
+        Args:
+            srcString: source string of the sequence
+            labelSubSequence: the name of the children dictionary entry for the subsequence as string
+            formatSubSequence: the format of the subsequence in children dictionary entry as string
+
+        '''
+        self.inicializeSequence("directory")
+        self.metadata["nameFiles"] = []
+        self.metadata["directoriesPath"] = []
+        if not listTokenizer or listTokenizer == None:
+                listTokenizer = [WhitespaceTokenizer()]
+        contenido = os.listdir(src)
+        for file in contenido:
+            if os.path.isfile(src+"/"+file):
+                self.metadata["nameFiles"].append(file)
+                if listLabel and listClasses:
+                    if listLabel[1] in self.children:
+                        self.children[listLabel[1]].append(listClasses[0](src+"/"+file,listLabel[1:],listClasses[1:],listTokenizer[1:]))
+                    else:
+                        self.children[listLabel[1]] = [listClasses[0](src+"/"+file,listLabel[1:],listClasses[1:],listTokenizer[1:])]
+            
+            else:
+                self.metadata["directoriesPath"].append(src+"/"+file)
+                if listLabel[0] in self.children:
+                    self.children[listLabel[0]].append(SequenceDirectory(src+"/"+file,listLabel,listClasses,listTokenizer ))
+                else:
+                    self.children[listLabel[0]] = [SequenceDirectory(src+"/"+file,listLabel,listClasses,listTokenizer)]
+
+        
+
+    def inicializeSequence(self, format):
+        '''
+        Initializes the attributes of a sequence.
+
+        Args:
+            format: a string with the origin format of the sequence.
+        '''
+        super().inicializeSequence(format)
+
+    def __str__(self):
+        '''
+         Convert a Sequence to a string
+        
+        Returns:
+           A string that contains the text of a Sequence  
+        '''
+        return super().__str__()
+    
+
+    def __repr__(self):
+        '''
+        Convert a Sequence to a string
+        
+        Returns:
+           A string with the formal representation of a Sequence  
+        '''
+        return super().__repr__()
+
+    def __len__(self):
+        '''
+        Calculate the length of a Sequence.
+        The length of a Sequence is the length of the children.
+
+        Returns:
+            A number with the length of the Sequence
+        '''
+        return super().__len__()
+
+    def __iter__(self):
+        '''
+        Iterate in a Sequence
+        To do this, we iterates througth the children dictionary 
+        Returns:
+            A Sequence Iterator  
+        '''
+        return super().__iter__()
+    
+    def __getitem__(self, idx):
+        '''
+        Get the value of a key from the dictionary of children 
+
+        Args:
+            idx: a string that represent the key of the children dictionary
+                 or an integer that represent the position of the key in children dictionary keys
+
+        Returns:
+            A List of Sequences 
+        '''
+        return super().__getitem__(idx)
+
+    def __eq__(self, other):
+        '''
+        Check if a sequence it is the same that the current one.
+
+        Args:
+            other: a sequence to check if it is the same that the current one.
+        Returns:
+            True if the sequences are equals.
+            False in others cases.
+        '''
+        return super().__eq__(other)
+
+    def depth(self,dictionaryList: Optional[list] = None):
+        '''
+        Calculate the maximum depth of a Sequence
+
+        Args:
+            diccionaryList: the inicial list to calculate the depth.
+
+        Returns:
+            A tuple that contains a number (the depth of a Sequence) and a list (the route of the max depth) 
+        '''
+        return super().depth(dictionaryList)
+    
+    def filter(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filter(level,criteria)
+    
+    def filterMetadata(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filterMetadata(level,criteria)
\ No newline at end of file
--- a/textflow/SequenceFile.py
+++ b/textflow/SequenceFile.py
+import os
+from typing import Optional
+from nltk.tokenize import WhitespaceTokenizer
+from textflow.Sequence import Sequence
+from textflow.SequenceString import SequenceString
+
+
+class SequenceFile (Sequence):
+    """
+    A class that provides methods to create a sequence from a file
+
+    Attributes:
+        format: a string with the origin format of a sequence.
+        metadata: a dictionary with the metadata of a sequence.
+        children: a dictionary with the subsequence of a sequence. 
+    """
+
+    def __init__(self,src,listLabel = ["tokens"],listClasses=[SequenceString],listTokenizer=[WhitespaceTokenizer()]):
+        '''
+        Initialize a Sequence from a file path
+
+        By default, create subsequences splitting the text of the file into words.
+
+        Args:
+            srcString: source string of the sequence
+            labelSubSequence: the name of the children dictionary entry for the subsequence as string
+            formatSubSequence: the format of the subsequence in children dictionary entry as string
+
+        '''
+        self.inicializeSequence("file")
+        with open(src, "r") as f:
+            txt = f.read()
+        self.metadata["text"]= txt
+        self.metadata["nameFile"]= src.split("/")[-1]
+        if listLabel and listClasses:
+            if not listTokenizer or listTokenizer == None:
+                listTokenizer = [WhitespaceTokenizer()]
+            self.children[listLabel[0]] = [listClasses[0](token_src,listLabel[1:],listClasses[1:],listTokenizer[1:]) for token_src in listTokenizer[0].tokenize(txt)]
+        
+
+
+    def inicializeSequence(self, format):
+        '''
+        Initializes the attributes of a sequence.
+
+        Args:
+            format: a string with the origin format of the sequence.
+        '''
+        super().inicializeSequence(format)
+
+    def __str__(self):
+        '''
+         Convert a Sequence to a string
+        
+        Returns:
+           A string that contains the text of a Sequence  
+        '''
+        return super().__str__()
+    
+
+    def __repr__(self):
+        '''
+        Convert a Sequence to a string
+        
+        Returns:
+           A string with the formal representation of a Sequence  
+        '''
+        return super().__repr__()
+
+    def __len__(self):
+        '''
+        Calculate the length of a Sequence.
+        The length of a Sequence is the length of the children.
+
+        Returns:
+            A number with the length of the Sequence
+        '''
+        return super().__len__()
+
+    def __iter__(self):
+        '''
+        Iterate in a Sequence
+        To do this, we iterates througth the children dictionary 
+        Returns:
+            A Sequence Iterator  
+        '''
+        return super().__iter__()
+    
+    def __getitem__(self, idx):
+        '''
+        Get the value of a key from the dictionary of children 
+
+        Args:
+            idx: a string that represent the key of the children dictionary
+                 or an integer that represent the position of the key in children dictionary keys 
+        Returns:
+            A List of Sequences 
+        '''
+        return super().__getitem__(idx)
+
+    def __eq__(self, other):
+        '''
+        Check if a sequence it is the same that the current one.
+
+        Args:
+            other: a sequence to check if it is the same that the current one.
+        Returns:
+            True if the sequences are equals.
+            False in others cases.
+        '''
+        return super().__eq__(other)
+
+    def depth(self,dictionaryList: Optional[list] = None):
+        '''
+        Calculate the maximum depth of a Sequence
+
+        Args:
+            diccionaryList: the inicial list to calculate the depth.
+
+        Returns:
+            A tuple that contains a number (the depth of a Sequence) and a list (the route of the max depth) 
+        '''
+        return super().depth(dictionaryList)
+    
+    def filter(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filter(level,criteria)
+    
+    def filterMetadata(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filterMetadata(level,criteria)
\ No newline at end of file
--- a/textflow/SequenceString.py
+++ b/textflow/SequenceString.py
+import os
+from typing import Optional
+from nltk.tokenize import WhitespaceTokenizer
+from textflow.Sequence import Sequence
+from textflow.SequenceToken import SequenceToken
+
+
+class SequenceString (Sequence):
+    """
+    A class that provides methods to create a sequence from a string
+
+    Attributes:
+        format: a string with the origin format of a sequence.
+        metadata: a dictionary with the metadata of a sequence.
+        children: a dictionary with the subsequence of a sequence. 
+    """
+
+
+    def __init__(self,src, listLabel=["token"],listClasses=[SequenceToken],listTokenizer=[WhitespaceTokenizer()]):
+        '''
+        Initialize a Sequence from a string.
+
+        Args:
+            srcString: source string of the sequence
+            labelSubSequence: the name of the children dictionary entry for the subsequence as string
+            formatSubSequence: the format of the subsequence in children dictionary entry as string
+
+        Raises:
+            ValueError: If srcString isn't a string .
+        '''
+        self.inicializeSequence("string")
+        if not isinstance(src, str):
+            raise ValueError(f"{src} is not an instance of string")
+        self.metadata["text"]= src
+        if listLabel and listClasses:
+            if not listTokenizer or listTokenizer == None:
+                listTokenizer = [WhitespaceTokenizer()]
+            self.children[listLabel[0]] = [listClasses[0](token_src,listLabel[1:],listClasses[1:],listTokenizer[1:]) for token_src in listTokenizer[0].tokenize(src)]
+
+    def inicializeSequence(self, format):
+        '''
+        Initializes the attributes of a sequence.
+
+        Args:
+            format: a string with the origin format of the sequence.
+        '''
+        super().inicializeSequence(format)
+
+    def __str__(self):
+        '''
+         Convert a Sequence to a string
+        
+        Returns:
+           A string that contains the text of a Sequence  
+        '''
+        return super().__str__()
+    
+
+    def __repr__(self):
+        '''
+        Convert a Sequence to a string
+        
+        Returns:
+           A string with the formal representation of a Sequence  
+        '''
+        return super().__repr__()
+
+    def __len__(self):
+        '''
+        Calculate the length of a Sequence.
+        The length of a Sequence is the length of the children.
+
+        Returns:
+            A number with the length of the Sequence
+        '''
+        return super().__len__()
+
+    def __iter__(self):
+        '''
+        Iterate in a Sequence
+        To do this, we iterates througth the children dictionary 
+        Returns:
+            A Sequence Iterator  
+        '''
+        return super().__iter__()
+    
+    def __getitem__(self, idx):
+        '''
+        Get the value of a key from the dictionary of children 
+
+        Args:
+            idx: a string that represent the key of the children dictionary
+                 or an integer that represent the position of the key in children dictionary keys 
+        Returns:
+            A List of Sequences 
+        '''
+        return super().__getitem__(idx)
+
+    def __eq__(self, other):
+        '''
+        Check if a sequence it is the same that the current one.
+
+        Args:
+            other: a sequence to check if it is the same that the current one.
+        Returns:
+            True if the sequences are equals.
+            False in others cases.
+        '''
+        return super().__eq__(other)
+
+    def depth(self,dictionaryList: Optional[list] = None):
+        '''
+        Calculate the maximum depth of a Sequence
+
+        Args:
+            diccionaryList: the inicial list to calculate the depth.
+
+        Returns:
+            A tuple that contains a number (the depth of a Sequence) and a list (the route of the max depth) 
+        '''
+        return super().depth(dictionaryList)
+    
+    def filter(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filter(level,criteria)
+    
+    def filterMetadata(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filterMetadata(level,criteria)
+
+    
\ No newline at end of file
--- a/textflow/SequenceToken.py
+++ b/textflow/SequenceToken.py
+import os
+from typing import Optional
+from nltk.tokenize import WhitespaceTokenizer
+from textflow.Sequence import Sequence
+
+
+class SequenceToken (Sequence):
+    """
+    A class that provides methods to create a sequence from a token
+
+    Attributes:
+        format: a string with the origin format of a sequence.
+        metadata: a dictionary with the metadata of a sequence.
+        children: a dictionary with the subsequence of a sequence. 
+    """
+
+
+    def __init__(self,src,listLabel,listClasses,listTokenizer=None):
+        '''
+        Initialize a Sequence from a token
+
+        Args:
+            srcString: source string of the sequence
+            labelSubSequence: the name of the children dictionary entry for the subsequence as string
+            formatSubSequence: the format of the subsequence in children dictionary entry as string
+
+        '''
+        self.inicializeSequence("token")
+        self.metadata["text"] = src
+
+    def inicializeSequence(self, format):
+        '''
+        Initializes the attributes of a sequence.
+
+        Args:
+            format: a string with the origin format of the sequence.
+        '''
+        super().inicializeSequence(format)
+
+    def __str__(self):
+        '''
+         Convert a Sequence to a string
+        
+        Returns:
+           A string that contains the text of a Sequence  
+        '''
+        return super().__str__()
+    
+
+    def __repr__(self):
+        '''
+        Convert a Sequence to a string
+        
+        Returns:
+           A string with the formal representation of a Sequence  
+        '''
+        return super().__repr__()
+
+    def __len__(self):
+        '''
+        Calculate the length of a Sequence.
+        The length of a Sequence is the length of the children.
+
+        Returns:
+            A number with the length of the Sequence
+        '''
+        return super().__len__()
+
+    def __iter__(self):
+        '''
+        Iterate in a Sequence
+        To do this, we iterates througth the children dictionary 
+        Returns:
+            A Sequence Iterator  
+        '''
+        return super().__iter__()
+    
+    def __getitem__(self, idx):
+        '''
+        Get the value of a key from the dictionary of children 
+
+        Args:
+            idx: a string that represent the key of the children dictionary
+                 or an integer that represent the position of the key in children dictionary keys 
+        Returns:
+            A List of Sequences 
+        '''
+        return super().__getitem__(idx)
+
+    def __eq__(self, other):
+        '''
+        Check if a sequence it is the same that the current one.
+
+        Args:
+            other: a sequence to check if it is the same that the current one.
+        Returns:
+            True if the sequences are equals.
+            False in others cases.
+        '''
+        return super().__eq__(other)
+
+    def depth(self,dictionaryList: Optional[list] = None):
+        '''
+        Calculate the maximum depth of a Sequence
+
+        Args:
+            diccionaryList: the inicial list to calculate the depth.
+
+        Returns:
+            A tuple that contains a number (the depth of a Sequence) and a list (the route of the max depth) 
+        '''
+        return super().depth(dictionaryList)
+    
+    def filter(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filter(level,criteria)
+    
+    def filterMetadata(self, level, criteria):
+        '''
+        Filter the children of a Sequence according to a criteria
+
+        Args:
+            level: the route of the level as string, separating each level with "/" 
+            criteria: the filter function
+
+        Returns:
+            A generator with the result of the filter
+        '''
+        return super().filterMetadata(level,criteria)
--- a/textflow/StylometryAnalyzer.py
+++ b/textflow/StylometryAnalyzer.py
@@ -10,7 +10,26 @@ import math
 from textflow.Analyzer import Analyzer

 class StylometryAnalyzer(Analyzer): 
+    """
+    A class that provides methods to analyze the stylometry of the text of a sequence.

+    Attributes:
+        stopwords: a list with stopwords.
+        puntuation: a list with puntuationMarks.
+        tokenizer: a function to tokenize the text.
+        uniqueWords: a list with the vocabulary of a text.
+        numWordFreqOne: the numbers of words that only appear one time in the text. 
+        TTR: type-token ratio.
+        RTTR: root type-token ratio.
+        herdan: the index of Herdan.
+        mass: the index of Mass.
+        somers: the index of Somers.
+        dugast: the index of Dugast.
+        honore: the index of Honoré.
+        freqStopWords: the frequence of the stopwords in the text.
+        freqPuntuationMarks: the frequence of the different puntuations marks in the text.
+        freqWord: the frequence of the different words in the text.
+    """
    def __init__(self,stopwords, puntuation = string.punctuation,tokenizer = WhitespaceTokenizer()):
        """
        Create a stylometry analyzer from an input object.

--- a/textflow/VolumetryAnalyzer.py
+++ b/textflow/VolumetryAnalyzer.py
@@ -4,6 +4,12 @@ from nltk.tokenize import WhitespaceTokenizer
 from textflow.Analyzer import Analyzer

 class VolumetryAnalyzer(Analyzer):
+    """
+    A class that provides methods to analyze the volumetry of the text of a sequence.
+
+    Attributes:
+        tokenizer: the way to split the text of a sequence in tokens.
+    """
    def __init__(self, tokenizer= WhitespaceTokenizer()):
        """
        Create a volumetry analyzer from an input object.