extended meassures added

parent c3a2ce28
......@@ -72,28 +72,43 @@ class ComplexityFrench(ComplexityLanguage):
"""
self.config += [True, True, True]
self.metricsStr.extend(['KANDEL-MODELS','DALE CHALL', 'SOL'])
self.configExtend += [True, True]
self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
def readability(self):
#Number of low frequency words
count = 0
lrarewords = []
for sentence in self.pos_content_sentences:
count = 0
for w in sentence:
if w.get_form() not in self.listDaleChall:
count+=1
N_difficultwords = count
lrarewords.append(count)
#print('lrarewords', lrarewords)
#N_difficultwords = count
self.N_difficultwords = sum(lrarewords)
#print("Number of rare words (N_rw): ", self.N_difficultwords, "\n")
self.mean_rw = np.mean(lrarewords)
self.std_rw = np.std(lrarewords)
#print("mean rare words: ", self.mean_rw)
#print("std rare words: ", self.std_rw)
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables = 0
N_syllables3 = 0
for words in self.listwords:
count=0
for character in words:
lwords=[]
for sentence in self.pos_content_sentences:
for w in sentence:
lwords.append(w.get_form())
count=0
for character in lwords:
if re.match('a|e|i|o|u|y', character):
N_syllables +=1
count+=1
if count>=3:
N_syllables3 += 1
if count>=3:
N_syllables3 += 1
self.N_syllables = N_syllables
self.N_syllables3 = N_syllables3
......@@ -103,11 +118,12 @@ class ComplexityFrench(ComplexityLanguage):
#print("KANDEL-MODELS: ", kandelmodelsreadability, "\n")
self.kandelmodelsreadability = kandelmodelsreadability
dalechallreadability =15.79 * (N_difficultwords / self.N_words) + 0.04906 * (self.N_words / self.N_sentences)
dalechallreadability =15.79 * (self.N_difficultwords / self.N_words) + 0.04906 * (self.N_words / self.N_sentences)
#print("DALE CHALL: ", dalechallreadability, "\n")
self.dalechallreadability = dalechallreadability
return self.kandelmodelsreadability, self.dalechallreadability
return self.kandelmodelsreadability, self.dalechallreadability, self.mean_rw, self.std_rw
def ageReadability(self):
......@@ -142,3 +158,24 @@ class ComplexityFrench(ComplexityLanguage):
return metrics
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = super().calcMetricsExtend(text)
metricsFrExtend = self.metricsStrExtend
readability = None
for i in range(len(metricsExtend)-1, len(metricsFrExtend)):
if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'MEAN RARE WORDS':
readability = self.readability()
metricsExtend['MEAN RARE WORDS'] = readability[2]
if self.configExtend == None or self.configExtend[i] and metricsFrExtend[i] == 'STD RARE WORDS':
readability = self.readability()
metricsExtend['STD RARE WORDS'] = readability[3]
return metricsExtend
......@@ -62,9 +62,11 @@ class ComplexityItalian():
Si config == None se calculan todas las métricas de complejidad soportadas
"""
self.config = [True, True, True, True, True, True]
self.metricsIt = ['PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA', 'GULPEASE']
pass
self.metricsIt = ['AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA', 'GULPEASE']
self.configExtend = [True, True, True, True, True]
self.metricsItExtend = ['MEAN WORDS', 'STD WORDS','COMPLEX SENTENCES', 'MEAN SYLLABLES', 'STD SYLLABLES']
def textProcessing(self, text):
......@@ -96,24 +98,42 @@ class ComplexityItalian():
#Solo nos interesa contar los tokens que sean signo de puntuación.
#Number of words.
punctuation = []
N_words = []
lsentences=[]
for words in self.sentences:
lwords = []
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
N_words.append(w.get_form())
lwords.append(w.get_form())
lsentences.append(len(lwords))
#print('list sentences: ',lsentences)
self.N_words = sum(lsentences)
#print('Number of words (N_w): ', self.N_words, '\n' )
self.mean_words = np.mean(lsentences)
self.std_words = np.std(lsentences)
#print('media', np.mean(lsentences))
#print('std', np.std(lsentences))
#print('Las palabras del texto son : ', N_words)
self.N_words = len(N_words)
#self.N_words = len(N_words)
#print('Number of words (N_w): ', self.N_words, '\n' )
self.N_punctuation = len(punctuation)
self.punctuation = punctuation
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return self.N_punctuation, self.punctuation, self.N_words
if self.N_words == 0:
punctuation_over_words = 0
else:
punctuation_over_words = self.N_punctuation / self.N_words
self.punctuation_over_words = punctuation_over_words
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return self.punctuation_over_words, self.mean_words, self.std_words,self.N_punctuation, self.punctuation, self.N_words
def sentenceComplexity(self):
......@@ -182,25 +202,52 @@ class ComplexityItalian():
def mureadability(self):
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables = 0
N_syllables3 = 0
for words in self.listwords:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables +=1
count+=1
if count>=3:
N_syllables3 += 1
self.N_syllables = N_syllables
punctuation = []
lsyllablesentence=[]
for words in self.sentences:
lwords = []
N_syllables = 0
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
lwords.append(w.get_form())
#print('lwords', lwords)
for words in lwords:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables+=1
count+=1
if count>=3:
N_syllables3+= 1
lsyllablesentence.append(N_syllables)
#print('lsyllablesentence', lsyllablesentence)
self.N_syllables = sum(lsyllablesentence)
self.N_syllables3 = N_syllables3
self.mean_syllables = np.mean(lsyllablesentence)
self.std_syllables = np.std(lsyllablesentence)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#Number of letters
listwords = []
for words in self.sentences:
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
listwords.append(w.get_form())
N_letters= 0
letters = []
vecletters =[]
for word in self.listwords:
for word in listwords:
if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
letters.append(word)
N_letters+=len(word)
......@@ -217,7 +264,8 @@ class ComplexityItalian():
#print("READABILITY MU: ", mu, "\n")
self.mu = mu
return self.mu, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
return self.mu, self.mean_syllables, self.std_syllables, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
def readability(self):
......@@ -244,9 +292,9 @@ class ComplexityItalian():
for i in range(0, len(self.metricsIt)):
if self.config == None or self.config[i] and self.metricsIt[i] == 'PUNCTUATION MARKS':
if self.config == None or self.config[i] and self.metricsIt[i] == 'AVERAGE PUNCTUATION MARKS':
punctuationmarks = self.punctuationMarks()
metrics['PUNCTUATION MARKS'] = punctuationmarks[0]
metrics['AVERAGE PUNCTUATION MARKS'] = punctuationmarks[0]
if self.config == None or self.config[i] and self.metricsIt[i] == 'SCI':
sentencecomplexity= self.sentenceComplexity()
metrics['SCI'] = self.SCI
......@@ -276,7 +324,41 @@ class ComplexityItalian():
return self.pos_sentences
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = {}
punctuationmarks = None
sentencecomplexity = None
mureadability= None
for i in range(0, len(self.metricsItExtend)):
if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'MEAN WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['MEAN WORDS'] = punctuationmarks[1]
if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'STD WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['STD WORDS'] = punctuationmarks[2]
if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'COMPLEX SENTENCES':
sentencecomplexity= self.sentenceComplexity()
metricsExtend['COMPLEX SENTENCES'] = sentencecomplexity[1]
if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'MEAN SYLLABLES':
mureadability = self. mureadability()
metricsExtend['MEAN SYLLABLES'] = mureadability[1]
if self.configExtend == None or self.configExtend[i] and self.metricsItExtend[i] == 'STD SYLLABLES':
mureadability = self. mureadability()
metricsExtend['STD SYLLABLES'] = mureadability[2]
return metricsExtend
......
......@@ -67,6 +67,9 @@ class ComplexityLanguage():
"""
self.config = [True, True, True, True]
self.metricsStr = ['AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU']
self.configExtend = [True, True, True, True, True]
self.metricsStrExtend = ['MEAN WORDS', 'STD WORDS','COMPLEX SENTENCES', 'MEAN SYLLABLES', 'STD SYLLABLES']
pass
......@@ -110,18 +113,22 @@ class ComplexityLanguage():
#Solo nos interesa contar los tokens que sean signo de puntuación.
#Number of words.
punctuation = []
lwords = []
lsentences=[]
for words in self.sentences:
lwords = []
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
lwords.append(w.get_form())
lsentences.append(len(lwords))
self.N_words = len(lwords)
self.N_words = sum(lsentences)
#print('Number of words (N_w): ', self.N_words, '\n' )
self.mean_words = np.mean(lsentences)
self.std_words = np.std(lsentences)
self.N_punctuation = len(punctuation)
self.punctuation = punctuation
......@@ -133,7 +140,7 @@ class ComplexityLanguage():
self.punctuation_over_words = punctuation_over_words
#print("PUNCTUATION MARKS = ", self.N_punctuation,'\n')
return self.punctuation_over_words, self.N_punctuation, self.punctuation, self.N_words
return self.punctuation_over_words, self.mean_words, self.std_words, self.N_punctuation, self.punctuation, self.N_words
def sentenceComplexity(self):
......@@ -199,58 +206,85 @@ class ComplexityLanguage():
def mureadability(self):
#Number of syllables and Number of words with 3 or more syllables:tagger
N_syllables = 0
N_syllables3 = 0
for words in self.listwords:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables +=1
count+=1
if count>=3:
N_syllables3 += 1
self.N_syllables = N_syllables
punctuation = []
lsyllablesentence=[]
for words in self.sentences:
lwords = []
N_syllables = 0
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
lwords.append(w.get_form())
#print('lwords', lwords)
for words in lwords:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables+=1
count+=1
if count>=3:
N_syllables3+= 1
lsyllablesentence.append(N_syllables)
#print('lsyllablesentence', lsyllablesentence)
self.N_syllables = sum(lsyllablesentence)
self.N_syllables3 = N_syllables3
self.mean_syllables = np.mean(lsyllablesentence)
self.std_syllables = np.std(lsyllablesentence)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#Number of letters
listwords = []
for words in self.sentences:
for w in words:
if re.match('F.*', w.get_tag()):
punctuation.append(w.get_form())
else:
listwords.append(w.get_form())
N_letters= 0
letters = []
vecletters =[]
for word in self.listwords:
for word in listwords:
if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
letters.append(word)
N_letters+=len(word)
vecletters.append(len(word))
self.letters = letters
self.N_letters = N_letters
self.vecletters = vecletters
x=self.N_letters / self.N_words
varianza=np.var(self.vecletters)
mu = (self.N_words /(self.N_words-1))*(x/varianza)*100
#print("READABILITY MU: ", mu, "\n")
self.mu = mu
return self.mu, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
return self.mu,self.mean_syllables, self.std_syllables, self.N_syllables, self.N_syllables3, self.letters, self.N_letters, self.vecletters
def calcMetrics(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metrics = {}
punctuationMarks = None
autoreadability = None
sentencecomplexity = None
mureadability= None
for i in range(0, len(self.metricsStr)):
if self.config == None or self.config[i] and self.metricsStr[i] == 'AVERAGE PUNCTUATION MARKS':
punctuationmarks = self.punctuationMarks()
metrics['AVERAGE PUNCTUATION MARKS'] = punctuationmarks[0]
......@@ -263,8 +297,43 @@ class ComplexityLanguage():
if self.config == None or self.config[i] and self.metricsStr[i] == 'MU':
mureadability = self. mureadability()
metrics['MU'] = mureadability[0]
return metrics
return metrics
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = {}
punctuationmarks = None
sentencecomplexity = None
mureadability= None
for i in range(0, len(self.metricsStrExtend)):
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['MEAN WORDS'] = punctuationmarks[1]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['STD WORDS'] = punctuationmarks[2]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'COMPLEX SENTENCES':
sentencecomplexity= self.sentenceComplexity()
metricsExtend['COMPLEX SENTENCES'] = sentencecomplexity[1]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN SYLLABLES':
mureadability = self. mureadability()
metricsExtend['MEAN SYLLABLES'] = mureadability[1]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD SYLLABLES':
mureadability = self. mureadability()
metricsExtend['STD SYLLABLES'] = mureadability[2]
return metricsExtend
def getPOS(self, text):
self.textProcessing(text)
......
......@@ -23,7 +23,9 @@ class ComplexityPolish():
self.config = [True, True, True, True, True, True]
self.metricsStr = ['AVERAGE PUNCTUATION MARKS', 'ARI', 'FOG', 'FLESCH', 'FLESCH-KINCAID', 'PISAREK']
pass
self.configExtend = [True, True, True, True]
self.metricsStrExtend = ['MEAN WORDS', 'STD WORDS', 'MEAN SYLLABLES', 'STD SYLLABLES']
def textProcessing(self, text):
text = text.replace(u'\xa0', u' ')
......@@ -45,7 +47,7 @@ class ComplexityPolish():
N_text_tokens = len(self.text_tokens)
self.N_text_tokens = N_text_tokens
#print('Tokens: ', self.N_text_tokens)
# y ahora reorganizamos las oraciones a partir de los puntos aislados
sentences = []
ini = 0
......@@ -64,63 +66,80 @@ class ComplexityPolish():
N_sentences = len(sentences)
self.N_sentences = N_sentences
#print('Sentences: ',self.sentences)
N_charac=0
for word in self.text_tokens:
N_charac += len(word)
self.N_charac = N_charac
#print('The number the character is: ', self.N_charac)
N_syllables = 0
N_syllables3 = 0
for words in self.text_tokens:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables +=1
count+=1
if count>=3:
N_syllables3 += 1
self.N_syllables = N_syllables
self.N_syllables3 = N_syllables3
#print('The number of syllables is: ',self.N_syllables)
#print('The number of syllables3 is: ', self.N_syllables3)
return self.text_tokens, self.N_text_tokens, self.sentences, self.N_sentences, self.N_charac, self.N_syllables, self.N_syllables3
return self.text_tokens, self.N_text_tokens, self.sentences, self.N_sentences, self.N_charac
def punctuationMarks(self):
N_punctuation = 0
letters = []
lsentences = []
N_letters = 0
for word in self.text_tokens:
if re.match('[a-zA-Z]|á|ó|í|ú|é', word):
letters.append(word)
N_letters+=len(word)
else:
N_punctuation += 1
self.words = letters
self.N_words = len(letters)
#print('N_words: ', self.N_words)
N_syllables3 = 0
lsyllablesentence=[]
for words in self.sentences:
lwords = []
N_syllables = 0
for w in words:
if re.match('[a-zA-Z]|á|ó|í|ú|é', w):
lwords.append(w)
letters.append(w)
N_letters+=len(w)
else:
N_punctuation += 1
lsentences.append(len(lwords))
for words in lwords:
count=0
for character in words:
if re.match('a|e|i|o|u|y', character):
N_syllables+=1
count+=1
if count>=3:
N_syllables3+= 1
lsyllablesentence.append(N_syllables)
#print('lsyllablesentence', lsyllablesentence)
self.N_syllables = sum(lsyllablesentence)
self.N_syllables3 = N_syllables3
self.mean_syllables = np.mean(lsyllablesentence)
self.std_syllables = np.std(lsyllablesentence)
#print('media', self.mean_syllables)
#print('std', self.std_syllables)
#print('list sentences: ',lsentences)
self.N_words = sum(lsentences)
#print('Number of words (N_w): ', self.N_words, '\n' )
self.mean_words = np.mean(lsentences)
self.std_words = np.std(lsentences)
#print('media', np.mean(lsentences))
#print('std', np.std(lsentences))
self.words = letters
self.N_letters = N_letters
self.N_punctuation = N_punctuation
if self.N_words == 0:
punctuation_over_words = 0
else:
punctuation_over_words = self.N_punctuation / self.N_words
self.punctuation_over_words = punctuation_over_words
#print('The number of letter is: ', N_letters)
#print('The list of letter is: ', letters)
#print('The PUNCTUATION MARKS is: ', self.N_punctuation, '\n')
return self.punctuation_over_words, self.mean_words, self.std_words, self.mean_syllables, self.std_syllables, self.N_punctuation, self.words, self.N_words, self.N_letters, self.N_syllables, self.N_syllables3
return self.punctuation_over_words, self.N_punctuation, self.words, self.N_words, self.N_letters
def readability(self):
......@@ -198,3 +217,33 @@ class ComplexityPolish():
self.pos_sentences = pos_sentences
return self.pos_sentences
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = {}
textprocessing =None
punctuationmarks = None
for i in range(0, len(self.metricsStrExtend)):
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['MEAN WORDS'] = punctuationmarks[1]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD WORDS':
punctuationmarks = self.punctuationMarks()
metricsExtend['STD WORDS'] = punctuationmarks[2]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'MEAN SYLLABLES':
punctuationmarks = self.punctuationMarks()
metricsExtend['MEAN SYLLABLES'] = punctuationmarks[3]
if self.configExtend == None or self.configExtend[i] and self.metricsStrExtend[i] == 'STD SYLLABLES':
punctuationmarks = self.punctuationMarks()
metricsExtend['STD SYLLABLES'] = punctuationmarks[4]
return metricsExtend
......@@ -47,6 +47,9 @@ class ComplexitySpanish(ComplexityLanguage):
self.config += [True, True, True, True, True, True, True, True, True, True, True, True]
self.metricsStr.extend(['MaxDEPTH','MinDEPTH', 'MeanDEPTH', 'StdDEPTH', 'LC','SSR', 'HUERTA', 'IFSZ', 'POLINI', 'MINIMUN AGE', 'SOL', 'CRAWFORD'])
self.configExtend += [True, True]
self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
def textProcessing(self, text):
text = text.replace(u'\xa0', u' ').replace('"', '')
# meter todas las funciones en una patron de los tokens válidos
......@@ -166,21 +169,24 @@ class ComplexitySpanish(ComplexityLanguage):
#Number of rare words
byfreq = sorted(self.crea, key=self.crea.__getitem__, reverse=True)
byfreq = byfreq[:1500]
count = 0
lrarewords = []
for sentence in self.pos_content_sentences:
count = 0
for w in sentence:
if w.get_form().lower() not in byfreq:
count +=1
lrarewords.append(count)
N_rw = count
self.N_rw = N_rw
self.N_rw = sum(lrarewords)
#print("Number of rare words (N_rw): ", self.N_rw, "\n")
self.mean_rw = np.mean(lrarewords)
self.std_rw = np.std(lrarewords)
SSR = 1.609*(self.N_words / self.N_sentences) + 331.8* (self.N_rw /self.N_words) + 22.0
self.SSR= SSR
#print ("SPAULDING SPANISH READABILITY (SSR) ", self.SSR, "\n")
return self.SSR, self.N_rw
return self.SSR, self.mean_rw, self.std_rw, self.N_rw
def readability(self):
......@@ -300,4 +306,28 @@ class ComplexitySpanish(ComplexityLanguage):
metrics['CRAWFORD'] = self.yearsCrawford()
return metrics
def calcMetricsExtend(self, text):
"""
Calcula la métricas de complejidad activadas en la configuración
"""
self.textProcessing(text)
metricsExtend = super().calcMetricsExtend(text)
metricsEsExtend = self.metricsStrExtend
ssreadability = None
for i in range(len(metricsExtend)-1, len(metricsEsExtend)):
if self.configExtend == None or self.configExtend[i] and metricsEsExtend[i] == 'MEAN RARE WORDS':
ssreadability = self.ssReadability()
metricsExtend['MEAN RARE WORDS'] = ssreadability[1]
if self.configExtend == None or self.configExtend[i] and metricsEsExtend[i] == 'STD RARE WORDS':
ssreadability = self.ssReadability()
metricsExtend['STD RARE WORDS'] = ssreadability[2]
return metricsExtend
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment