Fixing some bugs

parent e65ff7ab
This diff could not be displayed because it is too large.
......@@ -14,7 +14,7 @@ class EmotionAnalyzer(Analyzer):
polarityClassifier: a pipeline that uses a model for inference the emotions of the text of a sequence.
"""
def __init__(self, task = "text-classification",modelEmotions = 'pysentimiento/robertuito-emotion-analysis', allScores = True):
def __init__(self, task = "text-classification",modelEmotions = 'pysentimiento/robertuito-emotion-analysis', allScores = True, maxEmbedding = 130):
"""
Create a emotions analyzer.
......@@ -22,8 +22,10 @@ class EmotionAnalyzer(Analyzer):
task: the task defining which pipeline will be returned.
model: the model that will be used by the pipeline to make predictions.
allScores: True, if we want that the classifier returns all scores. False, in other case.
maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
"""
self.emotionsClassifier = pipeline(task,model=modelEmotions, return_all_scores=allScores)
self.maxEmbedding = maxEmbedding
def analyze(self, sequence, tag, levelOfAnalyzer, levelOfResult:Optional[str] = ""):
......@@ -51,7 +53,7 @@ class EmotionAnalyzer(Analyzer):
"""
arrayResults =[]
for text in arrayText:
prediction = self.emotionsClassifier(text)
prediction = self.emotionsClassifier(text[:self.maxEmbedding])
#arrayResults.append(prediction[0][0])
arrayResults.append(prediction)
return arrayResults
......
......@@ -11,7 +11,7 @@ class PolarityAnalyzer(Analyzer):
polarityClassifier: a pipeline that uses a model for inference the polarity of the text of a sequence.
"""
def __init__(self, task = "text-classification",modelPolarity = 'finiteautomata/beto-sentiment-analysis', allScores = True):
def __init__(self, task = "text-classification",modelPolarity = 'finiteautomata/beto-sentiment-analysis', allScores = True, maxEmbedding = 512):
"""
Create a polarity analyzer.
......@@ -19,8 +19,10 @@ class PolarityAnalyzer(Analyzer):
task: the task defining which pipeline will be returned
model: the model that will be used by the pipeline to make predictions
allScores: True, if we want that the classifier returns all scores. False, in other case
maxEmbedding: The number of max_position_embedings in the config.json of the model selected.
"""
self.polarityClassifier = pipeline(task,model= modelPolarity, return_all_scores=allScores)
self.maxEmbeding = maxEmbedding
......@@ -48,7 +50,7 @@ class PolarityAnalyzer(Analyzer):
"""
arrayResults =[]
for text in arrayText:
prediction = self.polarityClassifier(text)
prediction = self.polarityClassifier(text[:self.maxEmbeding])
#arrayResults.append(prediction[0][0])
arrayResults.append(prediction)
return arrayResults
......
......@@ -69,7 +69,7 @@ class StylometryAnalyzer(Analyzer):
resultsList = []
for t in arrayText:
t.lower()
tokens = self.tokenizer.tokenize (t)
tokens = self.tokenizer.tokenize(t)
text= [token.lower() for token in tokens]
self.freqWords(text,self.stopwords,self.puntuation)
self.funcionesTTR(text)
......@@ -100,9 +100,20 @@ class StylometryAnalyzer(Analyzer):
self.numWordFreqOne = len( [token[0] for token in self.freqWord if token[1] == 1 ])
self.TTR = len(self.uniqueWords) / len(text)
self.RTTR = len(self.uniqueWords) / math.sqrt(len(text))
self.herdan = math.log(len(self.uniqueWords),10) / math.log(len(text),10)
self.mass = (math.log(len(text),10)- math.log(len(self.uniqueWords),10)) / pow(math.log(len(self.uniqueWords),10),2)
self.somers = math.log(math.log(len(self.uniqueWords),10),10) / math.log(math.log(len(text),10),10)
if len(text)== 1:
self.herdan = math.log(len(self.uniqueWords),10)
else:
self.herdan = math.log(len(self.uniqueWords),10) / math.log(len(text),10)
if pow(math.log(len(self.uniqueWords),10),2) == 0:
self.mass = (math.log(len(text),10)- math.log(len(self.uniqueWords),10))
else:
self.mass = (math.log(len(text),10)- math.log(len(self.uniqueWords),10)) / pow(math.log(len(self.uniqueWords),10),2)
if len(text) == 10:
self.somers = math.log(math.log(len(self.uniqueWords),10),10)
elif len(self.uniqueWords) == 10 or len(self.uniqueWords) == 1:
self.somers = 0
else:
self.somers = math.log(math.log(len(self.uniqueWords),10),10) / math.log(math.log(len(text),10),10)
if math.log(len(text),10)- math.log(len(self.uniqueWords),10) == 0:
self.dugast = pow(math.log(len(text),10),2)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment