extended meassures added

9622b455 · Arturo Montejo Ráez · c3a2ce28 · 9622b455 · 9622b455 · 9622b455
Commit 9622b455 authored May 18, 2018 by Arturo Montejo Ráez
Showing with 375 additions and 108 deletions
ComplexityFrench.py
ComplexityItalian.py
ComplexityLanguage.py
ComplexityPolish.py
ComplexitySpanish.py
--- a/ComplexityFrench.py
+++ b/ComplexityFrench.py
@@ -72,28 +72,43 @@ class ComplexityFrench(ComplexityLanguage):
        """
        self.config += [True, True, True]
        self.metricsStr.extend(['KANDEL-MODELS','DALE CHALL', 'SOL'])
+        
+        self.configExtend += [True, True]
+        self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
    
     def readability(self):
-            
+     
        #Number of low frequency words   
-        count = 0
+        lrarewords = []
        for sentence in self.pos_content_sentences:
+            count = 0
            for w in sentence:
                if w.get_form() not in self.listDaleChall:
                    count+=1
-        N_difficultwords = count
+                lrarewords.append(count)
+        #print('lrarewords', lrarewords)
+        #N_difficultwords = count
+        self.N_difficultwords = sum(lrarewords)
+        #print("Number of rare words (N_rw): ", self.N_difficultwords, "\n")
+        self.mean_rw = np.mean(lrarewords)
+        self.std_rw = np.std(lrarewords)
+        #print("mean rare words: ", self.mean_rw)
+        #print("std rare words: ", self.std_rw)
        
        #Number of syllables and Number of words with 3 or more syllables:tagger
        N_syllables = 0
        N_syllables3 = 0
-        for words in self.listwords:
-            count=0
-            for character in words:
+        lwords=[]
+        for sentence in self.pos_content_sentences:
+            for w in sentence:
+                lwords.append(w.get_form())
+                count=0
+            for character in lwords:
                if re.match('a|e|i|o|u|y', character):
                    N_syllables +=1
                    count+=1
-            if count>=3:
-                N_syllables3 += 1
+                if count>=3:
+                    N_syllables3 += 1
                                  
        self.N_syllables = N_syllables
        self.N_syllables3 = N_syllables3
@@ -103,11 +118,12 @@ class ComplexityFrench(ComplexityLanguage):
        #print("KANDEL-MODELS: ", kandelmodelsreadability, "\n")
        self.kandelmodelsreadability = kandelmodelsreadability
        
-        dalechallreadability =15.79 * (N_difficultwords / self.N_words) + 0.04906 *  (self.N_words / self.N_sentences) 
+        dalechallreadability =15.79 * (self.N_difficultwords / self.N_words) + 0.04906 *  (self.N_words / self.N_sentences) 
        #print("DALE CHALL: ", dalechallreadability, "\n")
        self.dalechallreadability = dalechallreadability
        
-        return self.kandelmodelsreadability, self.dalechallreadability
+        return self.kandelmodelsreadability, self.dalechallreadability, self.mean_rw, self.std_rw
+    
    
     def ageReadability(self):
                        
@@ -142,3 +158,24 @@ class ComplexityFrench(ComplexityLanguage):
        return metrics 
    
    
+     def calcMetricsExtend(self, text):
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
+        self.textProcessing(text)
+        metricsExtend = super().calcMetricsExtend(text)      
+        metricsFrExtend = self.metricsStrExtend
+        readability = None
+        
+        for i in range(len(metricsExtend)-1, len(metricsFrExtend)):
+                
+            if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'MEAN RARE WORDS':
+                readability = self.readability() 
+                metricsExtend['MEAN RARE WORDS'] = readability[2]
+                
+            if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'STD RARE WORDS':
+                readability = self.readability() 
+                metricsExtend['STD RARE WORDS'] = readability[3]
+                
+        return metricsExtend
+        
--- a/ComplexityItalian.py
+++ b/ComplexityItalian.py
@@ -62,9 +62,11 @@ class ComplexityItalian():
         Si config == None se calculan todas las métricas de complejidad soportadas
        """
        self.config = [True, True, True, True, True, True]
-        self.metricsIt = ['PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA', 'GULPEASE']
-       
-    pass
+        self.metricsIt = ['AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA', 'GULPEASE']
+ 
+        self.configExtend = [True, True, True, True, True]
+        self.metricsItExtend = ['MEAN WORDS', 'STD WORDS','COMPLEX SENTENCES', 'MEAN SYLLABLES', 'STD SYLLABLES']
+
                         
    
    def textProcessing(self, text):
@@ -96,24 +98,42 @@ class ComplexityItalian():
        #Solo nos interesa contar los tokens que sean signo de puntuación.
        #Number of words.
        punctuation = []
-        N_words = []
+        lsentences=[]
        for words in self.sentences:
+            lwords = []
            for w in words:
                if re.match('F.*', w.get_tag()):
-                    
                    punctuation.append(w.get_form())
                else:
-                    N_words.append(w.get_form())
+                    lwords.append(w.get_form())
+            lsentences.append(len(lwords))
+        #print('list sentences: ',lsentences)

+        self.N_words = sum(lsentences)
+        #print('Number of words (N_w): ', self.N_words, '\n' )
+        self.mean_words = np.mean(lsentences)
+        self.std_words = np.std(lsentences)
+        #print('media', np.mean(lsentences))
+        #print('std', np.std(lsentences))
+        
        #print('Las palabras del texto son : ', N_words)
-        self.N_words = len(N_words) 
+        #self.N_words = len(N_words) 
        #print('Number of words (N_w): ', self.N_words, '\n' )
        
        self.N_punctuation = len(punctuation)
        self.punctuation = punctuation
        #print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
        
-        return self.N_punctuation, self.punctuation, self.N_words
+        if self.N_words == 0:
+            punctuation_over_words = 0
+        else:
+            punctuation_over_words = self.N_punctuation / self.N_words
+            
+        self.punctuation_over_words = punctuation_over_words
+        #print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
+        
+        return self.punctuation_over_words, self.mean_words, self.std_words,self.N_punctuation, self.punctuation, self.N_words
+    
    
    def sentenceComplexity(self):
                        
@@ -182,25 +202,52 @@ class ComplexityItalian():
    def mureadability(self):
        
        #Number of syllables and Number of words with 3 or more syllables:tagger
-        N_syllables = 0
+        
        N_syllables3 = 0
-        for words in self.listwords:
-            count=0
-            for character in words:
-                if re.match('a|e|i|o|u|y', character):
-                    N_syllables +=1
-                    count+=1
-            if count>=3:
-                N_syllables3 += 1
-                                  
-        self.N_syllables = N_syllables
+        punctuation = []
+        lsyllablesentence=[]
+        for words in self.sentences:
+            lwords = []
+            N_syllables = 0
+            for w in words:
+                if re.match('F.*', w.get_tag()):
+                    punctuation.append(w.get_form())
+                else:
+                    lwords.append(w.get_form())
+            #print('lwords', lwords)
+            
+            for words in lwords:
+                count=0
+                for character in words:
+                    if re.match('a|e|i|o|u|y', character):
+                        N_syllables+=1
+                        count+=1
+                if count>=3:
+                    N_syllables3+= 1
+                    
+            lsyllablesentence.append(N_syllables)
+            #print('lsyllablesentence', lsyllablesentence)
+        
+        self.N_syllables = sum(lsyllablesentence)
        self.N_syllables3 = N_syllables3
+        self.mean_syllables = np.mean(lsyllablesentence)
+        self.std_syllables = np.std(lsyllablesentence)
+        #print('media', self.mean_syllables)
+        #print('std', self.std_syllables)
        
        #Number of letters
+        listwords = []
+        for words in self.sentences:
+            for w in words:
+                if re.match('F.*', w.get_tag()):
+                    punctuation.append(w.get_form())
+                else:
+                    listwords.append(w.get_form())
+        
        N_letters= 0
        letters = []
        vecletters =[]
-        for word in self.listwords:
+        for word in listwords:
                if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
                    letters.append(word)
                    N_letters+=len(word)
@@ -217,7 +264,8 @@ class ComplexityItalian():
        #print("READABILITY MU: ", mu, "\n")
        self.mu = mu
      
-        return  self.mu, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
+        return  self.mu, self.mean_syllables, self.std_syllables, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
+

    def readability(self):
        
@@ -244,9 +292,9 @@ class ComplexityItalian():
        
        for i in range(0, len(self.metricsIt)):
            
-            if self.config == None or self.config[i] and self.metricsIt[i] == 'PUNCTUATION MARKS':
+            if self.config == None or self.config[i] and self.metricsIt[i] == 'AVERAGE PUNCTUATION MARKS':
                punctuationmarks = self.punctuationMarks()
-                metrics['PUNCTUATION MARKS'] = punctuationmarks[0]
+                metrics['AVERAGE PUNCTUATION MARKS'] = punctuationmarks[0]
            if self.config == None or self.config[i] and self.metricsIt[i] == 'SCI':
                sentencecomplexity= self.sentenceComplexity()
                metrics['SCI'] = self.SCI
@@ -276,7 +324,41 @@ class ComplexityItalian():
           
        return self.pos_sentences

-       
+    def calcMetricsExtend(self, text):
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
+        self.textProcessing(text)
+        metricsExtend = {}
+        
+        punctuationmarks = None
+        sentencecomplexity = None
+        mureadability= None
+        
+        for i in range(0, len(self.metricsItExtend)):
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'MEAN WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['MEAN WORDS'] = punctuationmarks[1]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'STD WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['STD WORDS'] = punctuationmarks[2]
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'COMPLEX SENTENCES':
+                sentencecomplexity= self.sentenceComplexity()
+                metricsExtend['COMPLEX SENTENCES'] = sentencecomplexity[1]
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'MEAN SYLLABLES':
+                mureadability = self. mureadability()
+                metricsExtend['MEAN SYLLABLES'] = mureadability[1]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'STD SYLLABLES':
+                mureadability = self. mureadability()
+                metricsExtend['STD SYLLABLES'] = mureadability[2]
+                
+        return metricsExtend
+        
 
     


--- a/ComplexityLanguage.py
+++ b/ComplexityLanguage.py
@@ -67,6 +67,9 @@ class ComplexityLanguage():
        """
        self.config = [True, True, True, True]
        self.metricsStr = ['AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU']
+        self.configExtend = [True, True, True, True, True]
+        self.metricsStrExtend = ['MEAN WORDS', 'STD WORDS','COMPLEX SENTENCES', 'MEAN SYLLABLES', 'STD SYLLABLES']
+       

    pass

@@ -110,18 +113,22 @@ class ComplexityLanguage():
        #Solo nos interesa contar los tokens que sean signo de puntuación.
        #Number of words.
        punctuation = []
-        lwords = []
+        lsentences=[]
+        
        for words in self.sentences:
+			lwords = []
            for w in words:
                if re.match('F.*', w.get_tag()):
                    punctuation.append(w.get_form())
                else:
                    lwords.append(w.get_form())
+            lsentences.append(len(lwords))


-        self.N_words = len(lwords)
+        self.N_words = sum(lsentences)
        #print('Number of words (N_w): ', self.N_words, '\n' )
-
+		self.mean_words = np.mean(lsentences)
+        self.std_words = np.std(lsentences)
        self.N_punctuation = len(punctuation)
        self.punctuation = punctuation

@@ -133,7 +140,7 @@ class ComplexityLanguage():
        self.punctuation_over_words = punctuation_over_words
        #print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')

-        return self.punctuation_over_words, self.N_punctuation, self.punctuation, self.N_words
+        return self.punctuation_over_words, self.mean_words, self.std_words, self.N_punctuation, self.punctuation, self.N_words

    def sentenceComplexity(self):

@@ -199,58 +206,85 @@ class ComplexityLanguage():


    def mureadability(self):
-
+        
        #Number of syllables and Number of words with 3 or more syllables:tagger
-        N_syllables = 0
        N_syllables3 = 0
-        for words in self.listwords:
-            count=0
-            for character in words:
-                if re.match('a|e|i|o|u|y', character):
-                    N_syllables +=1
-                    count+=1
-            if count>=3:
-                N_syllables3 += 1
-
-        self.N_syllables = N_syllables
+        punctuation = []
+        lsyllablesentence=[]
+        for words in self.sentences:
+            lwords = []
+            N_syllables = 0
+            for w in words:
+                if re.match('F.*', w.get_tag()):
+                    punctuation.append(w.get_form())
+                else:
+                    lwords.append(w.get_form())
+            #print('lwords', lwords)
+            
+            for words in lwords:
+                count=0
+                for character in words:
+                    if re.match('a|e|i|o|u|y', character):
+                        N_syllables+=1
+                        count+=1
+                if count>=3:
+                    N_syllables3+= 1
+                    
+            lsyllablesentence.append(N_syllables)
+            #print('lsyllablesentence', lsyllablesentence)
+        
+        self.N_syllables = sum(lsyllablesentence)
        self.N_syllables3 = N_syllables3
+        self.mean_syllables = np.mean(lsyllablesentence)
+        self.std_syllables = np.std(lsyllablesentence)
+        #print('media', self.mean_syllables)
+        #print('std', self.std_syllables)

        #Number of letters
+        listwords = []
+        for words in self.sentences:
+            for w in words:
+                if re.match('F.*', w.get_tag()):
+                    punctuation.append(w.get_form())
+                else:
+                    listwords.append(w.get_form())
+        
        N_letters= 0
        letters = []
        vecletters =[]
-        for word in self.listwords:
+        for word in listwords:
                if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
                    letters.append(word)
                    N_letters+=len(word)
                    vecletters.append(len(word))
-
+                    
        self.letters = letters
        self.N_letters = N_letters
        self.vecletters = vecletters
-
+        
        x=self.N_letters / self.N_words
        varianza=np.var(self.vecletters)
-
+        
        mu = (self.N_words /(self.N_words-1))*(x/varianza)*100
        #print("READABILITY MU: ", mu, "\n")
        self.mu = mu
-
-        return  self.mu, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
+      
+        return  self.mu,self.mean_syllables, self.std_syllables, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters

    def calcMetrics(self, text):
-        """
-        Calcula la métricas de complejidad activadas en la configuración
-        """
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
        self.textProcessing(text)
        metrics = {}
-
+        
        punctuationMarks = None
        autoreadability = None
        sentencecomplexity = None
-
+        mureadability= None
+       
        for i in range(0, len(self.metricsStr)):
-
+            
            if self.config == None or self.config[i] and self.metricsStr[i] == 'AVERAGE PUNCTUATION MARKS':
                punctuationmarks = self.punctuationMarks()
                metrics['AVERAGE PUNCTUATION MARKS'] = punctuationmarks[0]
@@ -263,8 +297,43 @@ class ComplexityLanguage():
            if self.config == None or self.config[i] and self.metricsStr[i] == 'MU':
                mureadability = self. mureadability()
                metrics['MU'] = mureadability[0]
-
-        return metrics
+                      
+        return metrics 
+    
+    def calcMetricsExtend(self, text):
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
+        self.textProcessing(text)
+        metricsExtend = {}
+        
+        punctuationmarks = None
+        sentencecomplexity = None
+        mureadability= None
+        
+        for i in range(0, len(self.metricsStrExtend)):
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['MEAN WORDS'] = punctuationmarks[1]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['STD WORDS'] = punctuationmarks[2]
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'COMPLEX SENTENCES':
+                sentencecomplexity= self.sentenceComplexity()
+                metricsExtend['COMPLEX SENTENCES'] = sentencecomplexity[1]
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN SYLLABLES':
+                mureadability = self. mureadability()
+                metricsExtend['MEAN SYLLABLES'] = mureadability[1]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD SYLLABLES':
+                mureadability = self. mureadability()
+                metricsExtend['STD SYLLABLES'] = mureadability[2]
+                
+        return metricsExtend

    def getPOS(self, text):
        self.textProcessing(text)

--- a/ComplexityPolish.py
+++ b/ComplexityPolish.py
@@ -23,7 +23,9 @@ class ComplexityPolish():
        self.config = [True, True, True, True, True, True]
        self.metricsStr = ['AVERAGE PUNCTUATION MARKS', 'ARI', 'FOG', 'FLESCH', 'FLESCH-KINCAID', 'PISAREK']

-        pass
+        self.configExtend = [True, True, True, True]
+        self.metricsStrExtend = ['MEAN WORDS', 'STD WORDS', 'MEAN SYLLABLES', 'STD SYLLABLES']
+       

    def textProcessing(self, text):
        text = text.replace(u'\xa0', u' ')
@@ -45,7 +47,7 @@ class ComplexityPolish():
        N_text_tokens = len(self.text_tokens)
        self.N_text_tokens = N_text_tokens
        #print('Tokens: ', self.N_text_tokens)
-
+               
        # y ahora reorganizamos las oraciones a partir de los puntos aislados
        sentences = []
        ini = 0
@@ -64,63 +66,80 @@ class ComplexityPolish():
        N_sentences = len(sentences)
        self.N_sentences = N_sentences
        #print('Sentences: ',self.sentences)
-
-
-
+        
        N_charac=0
        for word in self.text_tokens:
            N_charac += len(word)
        self.N_charac = N_charac
        #print('The number the character is: ', self.N_charac)
-
-        N_syllables = 0
-        N_syllables3 = 0
-        for words in self.text_tokens:
-            count=0
-            for character in words:
-                if re.match('a|e|i|o|u|y', character):
-                    N_syllables +=1
-                    count+=1
-            if count>=3:
-                N_syllables3 += 1
-
-        self.N_syllables = N_syllables
-        self.N_syllables3 = N_syllables3
-
-        #print('The number of syllables is: ',self.N_syllables)
-        #print('The number of syllables3 is: ', self.N_syllables3)
-
-        return self.text_tokens, self.N_text_tokens, self.sentences, self.N_sentences, self.N_charac, self.N_syllables, self.N_syllables3
-
+        
+        return self.text_tokens, self.N_text_tokens, self.sentences, self.N_sentences, self.N_charac 
+    
    def punctuationMarks(self):
+
        N_punctuation = 0
        letters = []
+        lsentences = []
        N_letters = 0
-        for word in self.text_tokens:
-            if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
-                letters.append(word)
-                N_letters+=len(word)
-            else:
-                N_punctuation += 1
-
-        self.words = letters
-        self.N_words = len(letters)
-        #print('N_words: ', self.N_words)
+        N_syllables3 = 0
+        lsyllablesentence=[]
+        
+        for words in self.sentences:
+            lwords = []
+            N_syllables = 0
+        
+            for w in words:
+                if re.match('[a-zA-Z]|á|ó|í|ú|é', w):
+                    lwords.append(w)
+                    letters.append(w)
+                    N_letters+=len(w)
+                else:          
+                    N_punctuation += 1
+            lsentences.append(len(lwords))
+            
+            for words in lwords:
+                count=0
+                for character in words:
+                    if re.match('a|e|i|o|u|y', character):
+                        N_syllables+=1
+                        count+=1
+                if count>=3:
+                    N_syllables3+= 1
+                    
+            lsyllablesentence.append(N_syllables)
+        #print('lsyllablesentence', lsyllablesentence)
+        
+        self.N_syllables = sum(lsyllablesentence)
+        self.N_syllables3 = N_syllables3
+        self.mean_syllables = np.mean(lsyllablesentence)
+        self.std_syllables = np.std(lsyllablesentence)
+        #print('media', self.mean_syllables)
+        #print('std', self.std_syllables)
+        #print('list sentences: ',lsentences)
+        self.N_words = sum(lsentences)
+        #print('Number of words (N_w): ', self.N_words, '\n' )
+        self.mean_words = np.mean(lsentences)
+        self.std_words = np.std(lsentences)
+        #print('media', np.mean(lsentences))
+        #print('std', np.std(lsentences))
+        
+        self.words = letters         
        self.N_letters = N_letters
        self.N_punctuation = N_punctuation
-
+     
        if self.N_words == 0:
            punctuation_over_words = 0
        else:
            punctuation_over_words = self.N_punctuation / self.N_words
-
+            
        self.punctuation_over_words = punctuation_over_words
-
+                
        #print('The number of letter is: ', N_letters)
        #print('The list of letter is: ', letters)
        #print('The PUNCTUATION MARKS is: ', self.N_punctuation, '\n')
+        
+        return self.punctuation_over_words, self.mean_words, self.std_words, self.mean_syllables, self.std_syllables, self.N_punctuation, self.words, self.N_words, self.N_letters, self.N_syllables, self.N_syllables3

-        return self.punctuation_over_words, self.N_punctuation, self.words, self.N_words, self.N_letters

    def readability(self):

@@ -198,3 +217,33 @@ class ComplexityPolish():
        self.pos_sentences = pos_sentences

        return self.pos_sentences
+        
+     def calcMetricsExtend(self, text):
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
+        self.textProcessing(text)
+        metricsExtend = {}
+        
+        textprocessing =None
+        punctuationmarks = None
+        
+        for i in range(0, len(self.metricsStrExtend)):
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['MEAN WORDS'] = punctuationmarks[1]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD WORDS':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['STD WORDS'] = punctuationmarks[2]
+            
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN SYLLABLES':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['MEAN SYLLABLES'] = punctuationmarks[3]
+                
+            if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD SYLLABLES':
+                punctuationmarks = self.punctuationMarks()
+                metricsExtend['STD SYLLABLES'] = punctuationmarks[4]
+                
+        return metricsExtend
--- a/ComplexitySpanish.py
+++ b/ComplexitySpanish.py
@@ -47,6 +47,9 @@ class ComplexitySpanish(ComplexityLanguage):
        self.config += [True, True, True, True, True, True, True, True, True, True, True, True]
        self.metricsStr.extend(['MaxDEPTH','MinDEPTH', 'MeanDEPTH', 'StdDEPTH', 'LC','SSR', 'HUERTA', 'IFSZ', 'POLINI', 'MINIMUN AGE', 'SOL', 'CRAWFORD'])
        
+        self.configExtend += [True, True]
+        self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
+        
    def textProcessing(self, text):
        text = text.replace(u'\xa0', u' ').replace('"', '')
        # meter todas las funciones en una patron de los tokens válidos
@@ -166,21 +169,24 @@ class ComplexitySpanish(ComplexityLanguage):
        #Number of rare words
        byfreq = sorted(self.crea, key=self.crea.__getitem__, reverse=True)
        byfreq = byfreq[:1500]
-        count = 0
+        lrarewords = []
        for sentence in self.pos_content_sentences:
+            count = 0
            for w in sentence:
                if w.get_form().lower() not in byfreq:
                    count +=1
+            lrarewords.append(count)
        
-        N_rw = count
-        self.N_rw = N_rw
+        self.N_rw = sum(lrarewords)
        #print("Number of rare words (N_rw): ", self.N_rw, "\n")
-        
+        self.mean_rw = np.mean(lrarewords)
+        self.std_rw = np.std(lrarewords)
+                
        SSR = 1.609*(self.N_words / self.N_sentences) + 331.8* (self.N_rw /self.N_words) + 22.0 
        self.SSR= SSR
        #print ("SPAULDING SPANISH READABILITY (SSR) ", self.SSR, "\n")
        
-        return self.SSR, self.N_rw 
+        return self.SSR, self.mean_rw, self.std_rw, self.N_rw 
    
    def readability(self):
        
@@ -300,4 +306,28 @@ class ComplexitySpanish(ComplexityLanguage):
                metrics['CRAWFORD'] = self.yearsCrawford()
              
        return metrics 
+        
+	def calcMetricsExtend(self, text):
+        """ 
+        Calcula la métricas de complejidad activadas en la configuración 
+        """ 
+        self.textProcessing(text)
+        metricsExtend = super().calcMetricsExtend(text) 
+        metricsEsExtend = self.metricsStrExtend
+        
+        ssreadability = None
+        
+        for i in range(len(metricsExtend)-1, len(metricsEsExtend)):
+            
+            if self.configExtend == None or self.configExtend[i] and metricsEsExtend[i] == 'MEAN RARE WORDS':
+                ssreadability = self.ssReadability() 
+                metricsExtend['MEAN RARE WORDS'] = ssreadability[1]
+                
+            if self.configExtend == None or self.configExtend[i] and metricsEsExtend[i] == 'STD RARE WORDS':
+                ssreadability = self.ssReadability() 
+                metricsExtend['STD RARE WORDS'] = ssreadability[2]
+                
+        return metricsExtend
+    
+