from
sklearn.cross_validation
import
train_test_split
from
keras.models
import
Model
from
keras.layers
import
Dense, Embedding,
Input
, concatenate, Flatten, SpatialDropout1D
from
keras.layers
import
LSTM, Bidirectional, GlobalMaxPool1D, Dropout, CuDNNLSTM, GRU, CuDNNGRU, GlobalAveragePooling1D, GlobalMaxPooling1D
from
keras.preprocessing
import
text, sequence
from
keras.callbacks
import
EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard, LearningRateScheduler, Callback
from
keras.optimizers
import
Adam, Adadelta, SGD, RMSprop, Nadam
from
keras
import
backend as K
def
get_model():
inp
=
Input
(shape
=
(maxlen,))
x
=
Embedding(nb_words, embed_size, weights
=
[embedding_matrix], trainable
=
False
)(inp)
x
=
SpatialDropout1D(
0.2
)(x)
x
=
Bidirectional(CuDNNLSTM(
256
, return_sequences
=
True
))(x)
x
=
Dropout(
0.2
)(x)
x
=
Bidirectional(CuDNNGRU(
128
, return_sequences
=
True
))(x)
x
=
Dropout(
0.2
)(x)
avg_pool
=
GlobalAveragePooling1D()(x)
max_pool
=
GlobalMaxPooling1D()(x)
x
=
concatenate([avg_pool, max_pool])
x
=
Dense(
64
, activation
=
"relu"
)(x)
x
=
Dense(
6
, activation
=
"sigmoid"
)(x)
model
=
Model(inputs
=
inp, outputs
=
x)
opt
=
Adam(lr
=
1e
-
3
)
model.
compile
(loss
=
'binary_crossentropy'
, optimizer
=
opt, metrics
=
[
'accuracy'
])
return
model
INPUT
=
'./'
batch_size
=
32
epochs
=
10
model
=
get_model()
X_train, X_val
=
train_test_split(train_sequence, random_state
=
17
, train_size
=
0.90
)
y_train, y_val
=
train_test_split(y, random_state
=
17
, train_size
=
0.90
)
exp_decay
=
lambda
init, fin, steps: (init
/
fin)
*
*
(
1
/
(steps
-
1
))
-
1
steps
=
int
(
len
(train_df)
/
batch_size)
*
epochs
lr_init, lr_fin
=
0.001
,
0.0005
lr_decay
=
exp_decay(lr_init, lr_fin, steps)
K.set_value(model.optimizer.lr, lr_init)
K.set_value(model.optimizer.decay, lr_decay)
num
=
0
if
not
os.path.isdir(
INPUT
+
"models/"
):
os.mkdir(
INPUT
+
"models/"
)
if
not
os.path.isdir(
INPUT
+
"models/"
+
str
(num)):
os.mkdir(
INPUT
+
"models/"
+
str
(num))
file_path_best
=
INPUT
+
"models/"
+
str
(num)
+
"/"
+
"weights_best"
+
str
(num)
+
".hdf5"
checkpoint_best
=
ModelCheckpoint(file_path_best, monitor
=
'val_loss'
, verbose
=
1
, save_best_only
=
True
, mode
=
'min'
)
early
=
EarlyStopping(monitor
=
"val_loss"
, mode
=
"min"
, patience
=
3
)
callbacks_list
=
[checkpoint_best, early]
model.fit(X_train, y_train, batch_size
=
batch_size, epochs
=
epochs, validation_data
=
(X_val,y_val), callbacks
=
callbacks_list)
if
os.path.isfile(file_path_best):
print
(
'load '
,file_path_best)
model.load_weights(file_path_best)
y_test
=
model.predict([test_sequence], batch_size
=
256
, verbose
=
1
)