Commit 7556c49d by geni

new experiments

parent 2f3ef29b
...@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer ...@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import numpy as np import numpy as np
from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D, ActivityRegularization
from mpl_toolkits.axes_grid1.axes_size import Padded from mpl_toolkits.axes_grid1.axes_size import Padded
from keras.utils import np_utils from keras.utils import np_utils
from sklearn import metrics from sklearn import metrics
...@@ -159,14 +159,19 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee ...@@ -159,14 +159,19 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
#model.add(LSTM(128, return_sequences=True)) #model.add(LSTM(128, return_sequences=True))
model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001))) #model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="same"))
model.add(Flatten())
#model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5)) model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(GlobalMaxPooling1D())
#model.add(Dropout(0.25)) #model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001))) #model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5)) #model.add(Dropout(0.5))
model.add(Dense(len(CLASSES), activation='softmax')) model.add(Dense(len(CLASSES), activation='softmax'))
model.add(ActivityRegularization(l1=0.0,l2=0.0001))
# summarize the model # summarize the model
print(model.summary()) print(model.summary())
...@@ -184,7 +189,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee ...@@ -184,7 +189,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
print('Accuracy trainning: %f' % (accuracy*100)) print('Accuracy trainning: %f' % (accuracy*100))
#prediction #prediction
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=32, verbose=1) tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=64, verbose=1)
return tweets_dev_classified_labels return tweets_dev_classified_labels
......
...@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer ...@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import numpy as np import numpy as np
from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D, ActivityRegularization
from mpl_toolkits.axes_grid1.axes_size import Padded from mpl_toolkits.axes_grid1.axes_size import Padded
from keras.utils import np_utils from keras.utils import np_utils
from sklearn import metrics from sklearn import metrics
...@@ -70,8 +70,16 @@ def read_corpus(): ...@@ -70,8 +70,16 @@ def read_corpus():
for label in tweets_dev_labels.tolist(): for label in tweets_dev_labels.tolist():
tweets_dev_labels_numeric.append(CLASSES.index(label)) tweets_dev_labels_numeric.append(CLASSES.index(label))
print(tweets_train[:5])
print(tweets_train_labels_numeric[:5])
tweets_train_labels_numeric = np_utils.to_categorical(tweets_train_labels_numeric) tweets_train_labels_numeric = np_utils.to_categorical(tweets_train_labels_numeric)
print(tweets_train_labels_numeric[:5])
print(tweets_dev[:5])
print(tweets_dev_labels[:5])
print(tweets_dev_labels_numeric[:5])
return tweets_train.tweet, tweets_train_labels_numeric, tweets_dev.tweet, tweets_dev_labels_numeric return tweets_train.tweet, tweets_train_labels_numeric, tweets_dev.tweet, tweets_dev_labels_numeric
def read_lexicon(): def read_lexicon():
...@@ -187,7 +195,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee ...@@ -187,7 +195,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
corpus_train_index = fit_transform_vocabulary(tweets_train) corpus_train_index = fit_transform_vocabulary(tweets_train)
corpus_dev_index = fit_transform_vocabulary(tweets_dev) corpus_dev_index = fit_transform_vocabulary(tweets_dev)
max_len_input = 30 max_len_input = 27
train_features_pad = sequence.pad_sequences(corpus_train_index, maxlen=max_len_input, padding="post", truncating="post", value = 0) train_features_pad = sequence.pad_sequences(corpus_train_index, maxlen=max_len_input, padding="post", truncating="post", value = 0)
padded_docs_dev = sequence.pad_sequences(corpus_dev_index, maxlen=max_len_input, padding="post", truncating="post", value = 0) padded_docs_dev = sequence.pad_sequences(corpus_dev_index, maxlen=max_len_input, padding="post", truncating="post", value = 0)
...@@ -225,15 +233,21 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee ...@@ -225,15 +233,21 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model.add(e) model.add(e)
#number of features:_32 each vector of 200 dim is converted to a vector of 32 dim #number of features:_32 each vector of 200 dim is converted to a vector of 32 dim
model.add(LSTM(128, return_sequences=True)) #model.add(LSTM(128, return_sequences=True))
#model.add(Bidirectional(LSTM(128, return_sequences=True))) model.add(Bidirectional(LSTM(128, return_sequences=True)))
#model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001))) model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.25)) model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="same"))
model.add(Flatten()) model.add(Flatten())
model.add(Dense(32, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001))) #model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5)) model.add(Dropout(0.5))
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model.add(Dense(len(CLASSES), activation='softmax')) model.add(Dense(len(CLASSES), activation='softmax'))
model.add(ActivityRegularization(l1=0.0,l2=0.0001))
# summarize the model # summarize the model
print(model.summary()) print(model.summary())
...@@ -243,15 +257,15 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee ...@@ -243,15 +257,15 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
print("Training the model...") print("Training the model...")
earlyStopping = EarlyStopping('loss', patience=5, mode='min') earlyStopping = EarlyStopping('loss', patience=3, mode='min')
model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=32, epochs=50, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping]) #model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=64, epochs=1, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping])
loss, accuracy = model.evaluate(train_features_pad, tweets_train_labels_numeric, batch_size=32, verbose=1) model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=64, epochs=30, verbose=1, callbacks=[earlyStopping])
loss, accuracy = model.evaluate(train_features_pad, tweets_train_labels_numeric, batch_size=64, verbose=1)
print('Accuracy trainning: %f' % (accuracy*100)) print('Accuracy trainning: %f' % (accuracy*100))
#prediction #prediction
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=32, verbose=1) tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=64, verbose=1)
return tweets_dev_classified_labels return tweets_dev_classified_labels
def calculate_quality_performamnce(y_labels, y_classified_labels, model_name): def calculate_quality_performamnce(y_labels, y_classified_labels, model_name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment