Commit 7556c49d by geni

new experiments

parent 2f3ef29b
......@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D, ActivityRegularization
from mpl_toolkits.axes_grid1.axes_size import Padded
from keras.utils import np_utils
from sklearn import metrics
......@@ -159,14 +159,19 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
#model.add(LSTM(128, return_sequences=True))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
#model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="same"))
model.add(Flatten())
#model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(GlobalMaxPooling1D())
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model.add(Dense(len(CLASSES), activation='softmax'))
model.add(ActivityRegularization(l1=0.0,l2=0.0001))
# summarize the model
print(model.summary())
......@@ -184,7 +189,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
print('Accuracy trainning: %f' % (accuracy*100))
#prediction
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=32, verbose=1)
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=64, verbose=1)
return tweets_dev_classified_labels
......
......@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten
from keras.layers import Dense, LSTM, Embedding, Bidirectional, Conv1D, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation, Flatten, GlobalMaxPooling1D, ActivityRegularization
from mpl_toolkits.axes_grid1.axes_size import Padded
from keras.utils import np_utils
from sklearn import metrics
......@@ -70,8 +70,16 @@ def read_corpus():
for label in tweets_dev_labels.tolist():
tweets_dev_labels_numeric.append(CLASSES.index(label))
print(tweets_train[:5])
print(tweets_train_labels_numeric[:5])
tweets_train_labels_numeric = np_utils.to_categorical(tweets_train_labels_numeric)
print(tweets_train_labels_numeric[:5])
print(tweets_dev[:5])
print(tweets_dev_labels[:5])
print(tweets_dev_labels_numeric[:5])
return tweets_train.tweet, tweets_train_labels_numeric, tweets_dev.tweet, tweets_dev_labels_numeric
def read_lexicon():
......@@ -187,7 +195,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
corpus_train_index = fit_transform_vocabulary(tweets_train)
corpus_dev_index = fit_transform_vocabulary(tweets_dev)
max_len_input = 30
max_len_input = 27
train_features_pad = sequence.pad_sequences(corpus_train_index, maxlen=max_len_input, padding="post", truncating="post", value = 0)
padded_docs_dev = sequence.pad_sequences(corpus_dev_index, maxlen=max_len_input, padding="post", truncating="post", value = 0)
......@@ -225,15 +233,21 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model.add(e)
#number of features:_32 each vector of 200 dim is converted to a vector of 32 dim
model.add(LSTM(128, return_sequences=True))
#model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model.add(Dropout(0.25))
#model.add(LSTM(128, return_sequences=True))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
#model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="same"))
model.add(Flatten())
model.add(Dense(32, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
#model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED)))
model.add(Dropout(0.5))
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model.add(Dense(len(CLASSES), activation='softmax'))
model.add(ActivityRegularization(l1=0.0,l2=0.0001))
# summarize the model
print(model.summary())
......@@ -243,15 +257,15 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
print("Training the model...")
earlyStopping = EarlyStopping('loss', patience=5, mode='min')
earlyStopping = EarlyStopping('loss', patience=3, mode='min')
model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=32, epochs=50, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping])
loss, accuracy = model.evaluate(train_features_pad, tweets_train_labels_numeric, batch_size=32, verbose=1)
#model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=64, epochs=1, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping])
model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=64, epochs=30, verbose=1, callbacks=[earlyStopping])
loss, accuracy = model.evaluate(train_features_pad, tweets_train_labels_numeric, batch_size=64, verbose=1)
print('Accuracy trainning: %f' % (accuracy*100))
#prediction
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=32, verbose=1)
tweets_dev_classified_labels = model.predict_classes(padded_docs_dev, batch_size=64, verbose=1)
return tweets_dev_classified_labels
def calculate_quality_performamnce(y_labels, y_classified_labels, model_name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment