Commit 182e38fe by Jaime Collado

Bug fixes and refactorization

parent a88860e6
__pycache__
*.pyc
.ipynb_checkpoints
\ No newline at end of file
File mode changed
This diff could not be displayed because it is too large.
from functools import reduce
import spacy
import math
import syllables
import os
import re
import numpy as np
import syllables
crea_total_path = os.path.join(os.path.dirname(__file__), 'CREA_total.txt')
class ComplexityAnalyzer():
......@@ -44,15 +45,15 @@ class ComplexityAnalyzer():
self.pos_content_sentences = pos_content_sentences
def get_all_metrics(self):
self.punctuationMarks()
self.lexicalComplexity()
self.ssReadability()
self.sentenceComplexity()
self.autoReadability()
self.embeddingDepth()
self.punctuation_marks()
self.lexical_complexity()
self.ss_readability()
self.sentence_complexity()
self.auto_readability()
self.embedding_depth()
self.readability()
self.ageReadability()
self.yearsCrawford()
self.age_readability()
self.years_crawford()
metrics = {
'npunct': self.npunctuation, # number of punctuation marks
......@@ -82,7 +83,7 @@ class ComplexityAnalyzer():
}
return metrics
def punctuationMarks(self):
def punctuation_marks(self):
# Solo nos interesa contar los tokens que sean signo de puntuación.
# Number of words.
punctuation = []
......@@ -103,12 +104,12 @@ class ComplexityAnalyzer():
return self.npunctuation, self.punctuation, self.N_words
def lexicalComplexity(self):
def lexical_complexity(self):
# Number of low frequency words
count = 0
for sentence in self.pos_content_sentences:
for w in sentence:
if w.text not in self.crea:
if w.text not in self.lang_word_freqs:
count+=1
N_lfw = count
self.N_lfw = N_lfw
......@@ -142,12 +143,12 @@ class ComplexityAnalyzer():
return self.N_lfw, self.N_cw, self.N_dcw, self.N_s, self.LDI, self.ILFW, self.LC
def ssReadability(self):
def ss_readability(self):
'''
Spaulding Score of Readability
'''
#Number of rare words
byfreq = sorted(self.crea, key=self.crea.__getitem__, reverse=True)
byfreq = sorted(self.lang_word_freqs, key=self.lang_word_freqs.__getitem__, reverse=True)
byfreq = byfreq[:1500]
count = 0
for sentence in self.pos_content_sentences:
......@@ -162,7 +163,7 @@ class ComplexityAnalyzer():
return self.N_rw, self.SSR
def sentenceComplexity(self):
def sentence_complexity(self):
#Number of complex sentences
N_cs = 0
for sentence in self.sentences:
......@@ -192,7 +193,7 @@ class ComplexityAnalyzer():
return self.N_cs, self.ASL, self.CS, self.SCI
def autoReadability(self):
def auto_readability(self):
# Number of characters
count = 0
listwords = []
......@@ -225,7 +226,7 @@ class ComplexityAnalyzer():
return 320
return 1 + max(self.tree_height(x, cont) for x in root.children)
def embeddingDepth(self):
def embedding_depth(self):
## Output results
roots = [sent.root for sent in self.sentences]
max_list = []
......@@ -304,7 +305,7 @@ class ComplexityAnalyzer():
return self.n_syllables, self.n_syllables3, self.nletters, self.huertareadability, self.ifszreadability, self.polinicompressibility, self.mureadability, self.syll_words, self.words_sen
def ageReadability(self):
def age_readability(self):
minimumage = 0.2495 *(self.N_words/self.nsentences) + 6.4763 * (self.n_syllables /self.N_words) - 7.1395
self.minimumage = minimumage
......@@ -314,7 +315,7 @@ class ComplexityAnalyzer():
return self.minimumage, self.solreadability
def yearsCrawford(self):
def years_crawford(self):
years = -20.5 *(self.nsentences/self.N_words) + 4.9 * (self.n_syllables /self.N_words) - 3.407
self.years = years
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment