Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
Flor Miriam Plaza del Arco
/
WASSA 2018
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Merge Requests
0
Pipelines
Wiki
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
d473b6ca
authored
Jul 04, 2018
by
geni
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
update embeddings_rnn.py
parent
eb0c57c1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
12 deletions
embeddings_RNN.py
embeddings_RNN.py
View file @
d473b6ca
...
...
@@ -12,7 +12,7 @@ from nltk.tokenize.casual import TweetTokenizer
import
numpy
as
np
from
keras.preprocessing.sequence
import
pad_sequences
from
keras.models
import
Sequential
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
from
keras.layers
import
Dense
,
LSTM
,
Embedding
,
Bidirectional
,
Conv1D
,
GlobalAveragePooling1D
,
MaxPooling1D
,
Dropout
,
Activation
,
Flatten
,
GlobalMaxPooling1D
from
mpl_toolkits.axes_grid1.axes_size
import
Padded
from
keras.utils
import
np_utils
from
sklearn
import
metrics
...
...
@@ -127,7 +127,7 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
corpus_train_index
=
fit_transform_vocabulary
(
tweets_train
)
corpus_dev_index
=
fit_transform_vocabulary
(
tweets_dev
)
max_len_input
=
30
max_len_input
=
27
train_features_pad
=
sequence
.
pad_sequences
(
corpus_train_index
,
maxlen
=
max_len_input
,
padding
=
"post"
,
truncating
=
"post"
,
value
=
0
)
padded_docs_dev
=
sequence
.
pad_sequences
(
corpus_dev_index
,
maxlen
=
max_len_input
,
padding
=
"post"
,
truncating
=
"post"
,
value
=
0
)
...
...
@@ -157,14 +157,15 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model
.
add
(
e
)
#number of features:_32 each vector of 200 dim is converted to a vector of 32 dim
model
.
add
(
LSTM
(
128
,
return_sequences
=
True
))
#model.add(Bidirectional(LSTM(128, return_sequences=True)))
model
.
add
(
Dense
(
64
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
model
.
add
(
Dropout
(
0.25
))
model
.
add
(
Flatten
())
model
.
add
(
Dense
(
32
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
#model.add(LSTM(128, return_sequences=True))
model
.
add
(
Bidirectional
(
LSTM
(
128
,
return_sequences
=
True
)))
model
.
add
(
Dense
(
128
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.0001
)))
model
.
add
(
Dropout
(
0.5
))
model
.
add
(
Dense
(
32
,
activation
=
'relu'
,
kernel_initializer
=
glorot_uniform
(
seed
=
RANDOM_SEED
),
activity_regularizer
=
regularizers
.
l2
(
0.001
)))
model
.
add
(
GlobalMaxPooling1D
())
#model.add(Dropout(0.25))
#model.add(Dense(16, activation='relu', kernel_initializer=glorot_uniform(seed=RANDOM_SEED), activity_regularizer=regularizers.l2(0.001)))
#model.add(Dropout(0.5))
model
.
add
(
Dense
(
len
(
CLASSES
),
activation
=
'softmax'
))
# summarize the model
...
...
@@ -175,10 +176,11 @@ def classification_embedings_rnn(tweets_train, tweets_train_labels_numeric, twee
model
.
compile
(
optimizer
=
'adam'
,
loss
=
'categorical_crossentropy'
,
metrics
=
[
'acc'
])
print
(
"Training the model..."
)
earlyStopping
=
EarlyStopping
(
'loss'
,
patience
=
5
,
mode
=
'min'
)
earlyStopping
=
EarlyStopping
(
'loss'
,
patience
=
3
,
mode
=
'min'
)
model
.
fit
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
32
,
epochs
=
50
,
verbose
=
1
,
validation_data
=
(
train_features_pad
,
tweets_train_labels_numeric
),
callbacks
=
[
earlyStopping
])
loss
,
accuracy
=
model
.
evaluate
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
32
,
verbose
=
1
)
# model.fit(train_features_pad, tweets_train_labels_numeric, batch_size=32, epochs=50, verbose=1, validation_data=(train_features_pad,tweets_train_labels_numeric), callbacks=[earlyStopping])
model
.
fit
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
64
,
epochs
=
30
,
verbose
=
1
,
callbacks
=
[
earlyStopping
])
loss
,
accuracy
=
model
.
evaluate
(
train_features_pad
,
tweets_train_labels_numeric
,
batch_size
=
64
,
verbose
=
1
)
print
(
'Accuracy trainning:
%
f'
%
(
accuracy
*
100
))
#prediction
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment