Commit 182e38fe by Jaime Collado

Bug fixes and refactorization

parent a88860e6
__pycache__ __pycache__
*.pyc *.pyc
.ipynb_checkpoints
\ No newline at end of file
File mode changed
This diff could not be displayed because it is too large.
from functools import reduce from functools import reduce
import spacy
import math import math
import syllables
import os import os
import re import re
import numpy as np
import syllables
crea_total_path = os.path.join(os.path.dirname(__file__), 'CREA_total.txt') crea_total_path = os.path.join(os.path.dirname(__file__), 'CREA_total.txt')
class ComplexityAnalyzer(): class ComplexityAnalyzer():
...@@ -44,15 +45,15 @@ class ComplexityAnalyzer(): ...@@ -44,15 +45,15 @@ class ComplexityAnalyzer():
self.pos_content_sentences = pos_content_sentences self.pos_content_sentences = pos_content_sentences
def get_all_metrics(self): def get_all_metrics(self):
self.punctuationMarks() self.punctuation_marks()
self.lexicalComplexity() self.lexical_complexity()
self.ssReadability() self.ss_readability()
self.sentenceComplexity() self.sentence_complexity()
self.autoReadability() self.auto_readability()
self.embeddingDepth() self.embedding_depth()
self.readability() self.readability()
self.ageReadability() self.age_readability()
self.yearsCrawford() self.years_crawford()
metrics = { metrics = {
'npunct': self.npunctuation, # number of punctuation marks 'npunct': self.npunctuation, # number of punctuation marks
...@@ -82,7 +83,7 @@ class ComplexityAnalyzer(): ...@@ -82,7 +83,7 @@ class ComplexityAnalyzer():
} }
return metrics return metrics
def punctuationMarks(self): def punctuation_marks(self):
# Solo nos interesa contar los tokens que sean signo de puntuación. # Solo nos interesa contar los tokens que sean signo de puntuación.
# Number of words. # Number of words.
punctuation = [] punctuation = []
...@@ -103,12 +104,12 @@ class ComplexityAnalyzer(): ...@@ -103,12 +104,12 @@ class ComplexityAnalyzer():
return self.npunctuation, self.punctuation, self.N_words return self.npunctuation, self.punctuation, self.N_words
def lexicalComplexity(self): def lexical_complexity(self):
# Number of low frequency words # Number of low frequency words
count = 0 count = 0
for sentence in self.pos_content_sentences: for sentence in self.pos_content_sentences:
for w in sentence: for w in sentence:
if w.text not in self.crea: if w.text not in self.lang_word_freqs:
count+=1 count+=1
N_lfw = count N_lfw = count
self.N_lfw = N_lfw self.N_lfw = N_lfw
...@@ -142,12 +143,12 @@ class ComplexityAnalyzer(): ...@@ -142,12 +143,12 @@ class ComplexityAnalyzer():
return self.N_lfw, self.N_cw, self.N_dcw, self.N_s, self.LDI, self.ILFW, self.LC return self.N_lfw, self.N_cw, self.N_dcw, self.N_s, self.LDI, self.ILFW, self.LC
def ssReadability(self): def ss_readability(self):
''' '''
Spaulding Score of Readability Spaulding Score of Readability
''' '''
#Number of rare words #Number of rare words
byfreq = sorted(self.crea, key=self.crea.__getitem__, reverse=True) byfreq = sorted(self.lang_word_freqs, key=self.lang_word_freqs.__getitem__, reverse=True)
byfreq = byfreq[:1500] byfreq = byfreq[:1500]
count = 0 count = 0
for sentence in self.pos_content_sentences: for sentence in self.pos_content_sentences:
...@@ -162,7 +163,7 @@ class ComplexityAnalyzer(): ...@@ -162,7 +163,7 @@ class ComplexityAnalyzer():
return self.N_rw, self.SSR return self.N_rw, self.SSR
def sentenceComplexity(self): def sentence_complexity(self):
#Number of complex sentences #Number of complex sentences
N_cs = 0 N_cs = 0
for sentence in self.sentences: for sentence in self.sentences:
...@@ -192,7 +193,7 @@ class ComplexityAnalyzer(): ...@@ -192,7 +193,7 @@ class ComplexityAnalyzer():
return self.N_cs, self.ASL, self.CS, self.SCI return self.N_cs, self.ASL, self.CS, self.SCI
def autoReadability(self): def auto_readability(self):
# Number of characters # Number of characters
count = 0 count = 0
listwords = [] listwords = []
...@@ -225,7 +226,7 @@ class ComplexityAnalyzer(): ...@@ -225,7 +226,7 @@ class ComplexityAnalyzer():
return 320 return 320
return 1 + max(self.tree_height(x, cont) for x in root.children) return 1 + max(self.tree_height(x, cont) for x in root.children)
def embeddingDepth(self): def embedding_depth(self):
## Output results ## Output results
roots = [sent.root for sent in self.sentences] roots = [sent.root for sent in self.sentences]
max_list = [] max_list = []
...@@ -304,7 +305,7 @@ class ComplexityAnalyzer(): ...@@ -304,7 +305,7 @@ class ComplexityAnalyzer():
return self.n_syllables, self.n_syllables3, self.nletters, self.huertareadability, self.ifszreadability, self.polinicompressibility, self.mureadability, self.syll_words, self.words_sen return self.n_syllables, self.n_syllables3, self.nletters, self.huertareadability, self.ifszreadability, self.polinicompressibility, self.mureadability, self.syll_words, self.words_sen
def ageReadability(self): def age_readability(self):
minimumage = 0.2495 *(self.N_words/self.nsentences) + 6.4763 * (self.n_syllables /self.N_words) - 7.1395 minimumage = 0.2495 *(self.N_words/self.nsentences) + 6.4763 * (self.n_syllables /self.N_words) - 7.1395
self.minimumage = minimumage self.minimumage = minimumage
...@@ -314,7 +315,7 @@ class ComplexityAnalyzer(): ...@@ -314,7 +315,7 @@ class ComplexityAnalyzer():
return self.minimumage, self.solreadability return self.minimumage, self.solreadability
def yearsCrawford(self): def years_crawford(self):
years = -20.5 *(self.nsentences/self.N_words) + 4.9 * (self.n_syllables /self.N_words) - 3.407 years = -20.5 *(self.nsentences/self.N_words) + 4.9 * (self.n_syllables /self.N_words) - 3.407
self.years = years self.years = years
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment