Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Flor Miriam Plaza del Arco
/
WASSA 2018
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
7556c49d
authored
Jul 05, 2018
by
geni
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
new experiments
parent
2f3ef29b
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
19 deletions
embeddings_RNN.py
embeddings_lexicon_features_emotion_RNN.py
embeddings_RNN.py
View file @
7556c49d
...
...
@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import
numpy
as
np
from
keras.preprocessing.sequence
import
pad_sequences
from
keras.models
import
Sequential
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
,
GlobalMaxPooling1D
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
,
GlobalMaxPooling1D
,
ActivityRegularization
from
mpl_toolkits.axes_grid1.axes_size
import
Padded
from
keras.utils
import
np_utils
from
sklearn
import
metrics
...
...
@@ -159,14 +159,19 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
#model.add(LSTM(128, return_sequences=True))
model
.
add
(
Bidirectional
(
LSTM
(
128
,
return_sequences
=
True
)))
model
.
add
(
Dense
(
128
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
#model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model
.
add
(
Dense
(
128
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
)))
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
MaxPooling1D
(
pool_size
=
2
,
strides
=
1
,
padding
=
"same"
))
model
.
add
(
Flatten
())
#model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model
.
add
(
Dense
(
64
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
)))
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
Dense
(
32
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.001
)))
model
.
add
(
GlobalMaxPooling1D
())
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model
.
add
(
Dense
(
len
(
CLASSES
),
activation
=
'softmax'
))
model
.
add
(
ActivityRegularization
(
l1
=
0.0
,
l2
=
0.0001
))
# summarize the model
print
(
model
.
summary
())
...
...
@@ -184,7 +189,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
print
(
'Accuracy trainning:
%
f'
%
(
accuracy
*
100
))
#prediction
tweets_dev_classified_labels
=
model
.
predict_classes
(
padded_docs_dev
,
batch_size
=
32
,
verbose
=
1
)
tweets_dev_classified_labels
=
model
.
predict_classes
(
padded_docs_dev
,
batch_size
=
64
,
verbose
=
1
)
return
tweets_dev_classified_labels
...
...
embeddings_lexicon_features_emotion_RNN.py
View file @
7556c49d
...
...
@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import
numpy
as
np
from
keras.preprocessing.sequence
import
pad_sequences
from
keras.models
import
Sequential
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
,
GlobalMaxPooling1D
,
ActivityRegularization
from
mpl_toolkits.axes_grid1.axes_size
import
Padded
from
keras.utils
import
np_utils
from
sklearn
import
metrics
...
...
@@ -70,8 +70,16 @@ def read_corpus():
for
label
in
tweets_dev_labels
.
tolist
():
tweets_dev_labels_numeric
.
append
(
CLASSES
.
index
(
label
))
print
(
tweets_train
[:
5
])
print
(
tweets_train_labels_numeric
[:
5
])
tweets_train_labels_numeric
=
np_utils
.
to_categorical
(
tweets_train_labels_numeric
)
print
(
tweets_train_labels_numeric
[:
5
])
print
(
tweets_dev
[:
5
])
print
(
tweets_dev_labels
[:
5
])
print
(
tweets_dev_labels_numeric
[:
5
])
return
tweets_train
.
tweet
,
tweets_train_labels_numeric
,
tweets_dev
.
tweet
,
tweets_dev_labels_numeric
def
read_lexicon
():
...
...
@@ -187,7 +195,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
corpus_train_index
=
fit_transform_vocabulary
(
tweets_train
)
corpus_dev_index
=
fit_transform_vocabulary
(
tweets_dev
)
max_len_input
=
30
max_len_input
=
27
train_features_pad
=
sequence
.
pad_sequences
(
corpus_train_index
,
maxlen
=
max_len_input
,
padding
=
"post"
,
truncating
=
"post"
,
value
=
0
)
padded_docs_dev
=
sequence
.
pad_sequences
(
corpus_dev_index
,
maxlen
=
max_len_input
,
padding
=
"post"
,
truncating
=
"post"
,
value
=
0
)
...
...
@@ -225,15 +233,21 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model
.
add
(
e
)
#number of features:_32 each vector of 200 dim is converted to a vector of 32 dim
model
.
add
(
LSTM
(
128
,
return_sequences
=
True
))
#model.add(Bidirectional(LSTM(128, return_sequences=True)))
model
.
add
(
Dense
(
64
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
model
.
add
(
Dropout
(
0.25
))
#model.add(LSTM(128, return_sequences=True))
model
.
add
(
Bidirectional
(
LSTM
(
128
,
return_sequences
=
True
)))
#model.add(Dense(128, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.0001)))
model
.
add
(
Dense
(
128
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
)))
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
MaxPooling1D
(
pool_size
=
2
,
strides
=
1
,
padding
=
"same"
))
model
.
add
(
Flatten
())
model
.
add
(
Dense
(
32
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
#model.add(Dense(64, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
model
.
add
(
Dense
(
64
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
)))
model
.
add
(
Dropout
(
0.5
))
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model
.
add
(
Dense
(
len
(
CLASSES
),
activation
=
'softmax'
))
model
.
add
(
ActivityRegularization
(
l1
=
0.0
,
l2
=
0.0001
))
# summarize the model
print
(
model
.
summary
())
...
...
@@ -243,15 +257,15 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model
.
compile
(
optimizer
=
'adam'
,
loss
=
'categorical_crossentropy'
,
metrics
=
[
'acc'
])
print
(
"Training the model..."
)
earlyStopping
=
EarlyStopping
(
'loss'
,
patience
=
5
,
mode
=
'min'
)
earlyStopping
=
EarlyStopping
(
'loss'
,
patience
=
3
,
mode
=
'min'
)
model
.
fit
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
32
,
epochs
=
50
,
verbose
=
1
,
validation_data
=
(
train_features_pad
,
tweets_train_labels_numeric
),
callbacks
=
[
earlyStopping
])
loss
,
accuracy
=
model
.
evaluate
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
32
,
verbose
=
1
)
#model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=64, epochs=1, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping])
model
.
fit
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
64
,
epochs
=
30
,
verbose
=
1
,
callbacks
=
[
earlyStopping
])
loss
,
accuracy
=
model
.
evaluate
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
64
,
verbose
=
1
)
print
(
'Accuracy trainning:
%
f'
%
(
accuracy
*
100
))
#prediction
tweets_dev_classified_labels
=
model
.
predict_classes
(
padded_docs_dev
,
batch_size
=
32
,
verbose
=
1
)
tweets_dev_classified_labels
=
model
.
predict_classes
(
padded_docs_dev
,
batch_size
=
64
,
verbose
=
1
)
return
tweets_dev_classified_labels
def
calculate_quality_performamnce
(
y_labels
,
y_classified_labels
,
model_name
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment