def
__init__(
self
, is_training, config):
self
.batch_size
=
batch_size
=
config.batch_size
self
.num_steps
=
num_steps
=
config.num_steps
size
=
config.hidden_size
vocab_size
=
config.vocab_size
self
._input_data
=
tf.placeholder(tf.int32, [batch_size, num_steps])
self
._targets
=
tf.placeholder(tf.int32, [batch_size, num_steps])
lstm_cell
=
tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias
=
0.0
)
if
is_training
and
config.keep_prob <
1
:
lstm_cell
=
tf.nn.rnn_cell.DropoutWrapper(
lstm_cell, output_keep_prob
=
config.keep_prob)
cell
=
tf.nn.rnn_cell.MultiRNNCell([lstm_cell]
*
config.num_layers)
self
._initial_state
=
cell.zero_state(batch_size, tf.float32)
with tf.device(
"/cpu:0"
):
embedding
=
tf.get_variable(
"embedding"
, [vocab_size, size])
inputs
=
tf.nn.embedding_lookup(embedding,
self
._input_data)
if
is_training
and
config.keep_prob <
1
:
inputs
=
tf.nn.dropout(inputs, config.keep_prob)
outputs
=
[]
state
=
self
._initial_state
with tf.variable_scope(
"RNN"
):
for
time_step
in
range
(num_steps):
if
time_step >
0
: tf.get_variable_scope().reuse_variables()
(cell_output, state)
=
cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
output
=
tf.reshape(tf.concat(
1
, outputs), [
-
1
, size])
softmax_w
=
tf.get_variable(
"softmax_w"
, [size, vocab_size])
softmax_b
=
tf.get_variable(
"softmax_b"
, [vocab_size])
logits
=
tf.matmul(output, softmax_w)
+
softmax_b
loss
=
tf.nn.seq2seq.sequence_loss_by_example(
[logits],
[tf.reshape(
self
._targets, [
-
1
])],
[tf.ones([batch_size
*
num_steps])])
self
._cost
=
cost
=
tf.reduce_sum(loss)
/
batch_size
self
._final_state
=
state
if
not
is_training:
return
self
._lr
=
tf.Variable(
0.0
, trainable
=
False
)
tvars
=
tf.trainable_variables()
grads, _
=
tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer
=
tf.train.GradientDescentOptimizer(
self
.lr)
self
._train_op
=
optimizer.apply_gradients(
zip
(grads, tvars))