Update TextAnalisisSpacy.py

parent 20d4231a
Showing with 7 additions and 6 deletions
......@@ -79,8 +79,9 @@ class TextAnalisisSpacy():
dic[cat].append(dic[cat][len(dic[cat])-1])
df_n = pd.DataFrame(dic)
splot = sb.heatmap(df_n.transpose()).set_title(columna)
plt.show()
if export:
if export == False:
plt.show()
else:
splot.get_figure().savefig(columna+"-"+ type_g+".jpg", bbox_inches='tight')
plt.clf()
......@@ -276,7 +277,7 @@ class TextAnalisisSpacy():
display(df_freq_palabras.transpose())
df_freq_palabras_tr = df_freq_palabras.transpose()
df_freq_palabras_tr.to_csv("POS_"+ str(pos)+"_freq.csv")
return df_freq_palabras.transpose()
return df_freq_palabras.transpose()
def lexical_diversity(self):
# Lexical diversity for each text
......@@ -392,15 +393,15 @@ class TextAnalisisSpacy():
# Univariate feature selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, mutual_info_classif
print('Highest scoring '+ k +' features with f_classif...')
kbest_classif = SelectKBest(f_classif, k) # Elimina todo menos las k características de puntuación más alta
print('Highest scoring '+ str(k) +' features with f_classif...')
kbest_classif = SelectKBest(f_classif, k=k) # Elimina todo menos las k características de puntuación más alta
X_classif = kbest_classif.fit_transform(X, y)
print('Selected columns:',kbest_classif.get_feature_names_out(self.df.columns.values[2:]))
display(pd.DataFrame(X_classif))
pd.DataFrame(X_classif).to_csv("f_classif.csv")
print('Highest scoring '+ str(k) +' features with mutual_info_classif...')
kbest_mut = SelectKBest(mutual_info_classif, k)
kbest_mut = SelectKBest(mutual_info_classif, k=k)
X_mut = kbest_mut.fit_transform(X, y)
print('Selected columns:', kbest_mut.get_feature_names_out(self.df.columns.values[2:]))
display(pd.DataFrame(X_mut))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment