Update TextAnalisisSpacy.py

parent 20d4231a
Showing with 5 additions and 4 deletions
...@@ -79,8 +79,9 @@ class TextAnalisisSpacy(): ...@@ -79,8 +79,9 @@ class TextAnalisisSpacy():
dic[cat].append(dic[cat][len(dic[cat])-1]) dic[cat].append(dic[cat][len(dic[cat])-1])
df_n = pd.DataFrame(dic) df_n = pd.DataFrame(dic)
splot = sb.heatmap(df_n.transpose()).set_title(columna) splot = sb.heatmap(df_n.transpose()).set_title(columna)
if export == False:
plt.show() plt.show()
if export: else:
splot.get_figure().savefig(columna+"-"+ type_g+".jpg", bbox_inches='tight') splot.get_figure().savefig(columna+"-"+ type_g+".jpg", bbox_inches='tight')
plt.clf() plt.clf()
...@@ -392,15 +393,15 @@ class TextAnalisisSpacy(): ...@@ -392,15 +393,15 @@ class TextAnalisisSpacy():
# Univariate feature selection # Univariate feature selection
from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, mutual_info_classif from sklearn.feature_selection import f_classif, mutual_info_classif
print('Highest scoring '+ k +' features with f_classif...') print('Highest scoring '+ str(k) +' features with f_classif...')
kbest_classif = SelectKBest(f_classif, k) # Elimina todo menos las k características de puntuación más alta kbest_classif = SelectKBest(f_classif, k=k) # Elimina todo menos las k características de puntuación más alta
X_classif = kbest_classif.fit_transform(X, y) X_classif = kbest_classif.fit_transform(X, y)
print('Selected columns:',kbest_classif.get_feature_names_out(self.df.columns.values[2:])) print('Selected columns:',kbest_classif.get_feature_names_out(self.df.columns.values[2:]))
display(pd.DataFrame(X_classif)) display(pd.DataFrame(X_classif))
pd.DataFrame(X_classif).to_csv("f_classif.csv") pd.DataFrame(X_classif).to_csv("f_classif.csv")
print('Highest scoring '+ str(k) +' features with mutual_info_classif...') print('Highest scoring '+ str(k) +' features with mutual_info_classif...')
kbest_mut = SelectKBest(mutual_info_classif, k) kbest_mut = SelectKBest(mutual_info_classif, k=k)
X_mut = kbest_mut.fit_transform(X, y) X_mut = kbest_mut.fit_transform(X, y)
print('Selected columns:', kbest_mut.get_feature_names_out(self.df.columns.values[2:])) print('Selected columns:', kbest_mut.get_feature_names_out(self.df.columns.values[2:]))
display(pd.DataFrame(X_mut)) display(pd.DataFrame(X_mut))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment