Fixing ComplexityAnalyzer and NGramsAnalyzer

f0547ed3 · Estrella Vallecillo · 02dc279b · f0547ed3 · f0547ed3
Commit f0547ed3 authored Jul 18, 2022 by Estrella Vallecillo
Showing with 23 additions and 14 deletions
textflow/ComplexityAnalyzer.py
textflow/NGramsAnalyzer.py
--- a/textflow/ComplexityAnalyzer.py
+++ b/textflow/ComplexityAnalyzer.py
@@ -207,9 +207,14 @@ class ComplexityAnalyzer(Analyzer):
        
        avgLettersWords = numLetters/self.numWords
        listLenLetters = np.array(listLenLetters)
-        
-        self.poliniComprensibility = 95.2 - (9.7 * avgLettersWords) - ((0.35*self.numWords)/self.numSentences)        
-        self.muLegibility = (self.numWords/(self.numWords-1))*(avgLettersWords/listLenLetters.var())*100
+        if self.numSentences == 0:
+            self.poliniComprensibility = 95.2 - (9.7 * avgLettersWords) - ((0.35*self.numWords)/1)
+        else:    
+            self.poliniComprensibility = 95.2 - (9.7 * avgLettersWords) - ((0.35*self.numWords)/self.numSentences)        
+        if self.numWords < 2:
+            self.muLegibility = 0
+        else:    
+            self.muLegibility = (self.numWords/(self.numWords-1))*(avgLettersWords/listLenLetters.var())*100
        
    def lexicalIndex(self):
        """

--- a/textflow/NGramsAnalyzer.py
+++ b/textflow/NGramsAnalyzer.py
@@ -70,15 +70,19 @@ class NGramsAnalyzer(Analyzer):
        Args:
            text: a string/text to analyze
        """
-        vect = sklearn.feature_extraction.text.CountVectorizer(ngram_range=(self.ngramsSize,self.ngramsSize),tokenizer=self.tokenizer.tokenize,stop_words= self.stopwords)
-        text=[text]
-        vect.fit(text)
-        self.listOfNGrams = vect.get_feature_names_out().tolist()
-        dicfreq={}
-        for i in self.listOfNGrams:
-            if i in dicfreq:
-                dicfreq[i] += 1
-            else:
-                dicfreq[i] = 1
-        self.freqNGrams = dicfreq
+        try:
+            vect = sklearn.feature_extraction.text.CountVectorizer(ngram_range=(self.ngramsSize,self.ngramsSize),tokenizer=self.tokenizer.tokenize,stop_words= self.stopwords)
+            text=[text]
+            vect.fit(text)
+            self.listOfNGrams = vect.get_feature_names_out().tolist()
+            dicfreq={}
+            for i in self.listOfNGrams:
+                if i in dicfreq:
+                    dicfreq[i] += 1
+                else:
+                    dicfreq[i] = 1
+            self.freqNGrams = dicfreq
+        except Exception:
+            self.listOfNGrams = []
+            self.freqNGrams = {}