match = Sequential()
match.add(Merge([input_encoder_m, question_encoder],
mode="dot",
dot_axes=[2, 2]))
match.add(Activation("softmax"))
// output: (samples, story_maxlen, query_maxlen)
// embed the input into a single vector with size = story_maxlen:
After Change
// compute a "match" between the first input vector sequence
// and the question vector sequence
match = dot([input_encoded_m, question_encoded], axes=(2, 2)) // (samples, story_maxlen, query_maxlen)
match = Activation("softmax")(match)
// add the match matrix with the second input vector sequence
response = add([match, input_encoded_c]) // (samples, story_maxlen, query_maxlen)
response = Permute((2, 1))(response) // (samples, query_maxlen, story_maxlen)
// concatenate the match matrix with the question vector sequence
answer = concatenate([response, question_encoded])
// the original paper uses a matrix multiplication for this reduction step.
// we choose to use a RNN instead.
answer = LSTM(32)(answer) // (samples, 32)
// one regularization layer -- more would probably be needed.
answer = Dropout(0.3)(answer)
answer = Dense(vocab_size)(answer) // (samples, vocab_size)
// we output a probability distribution over the vocabulary
answer = Activation("softmax")(answer)
// build the final model
model = Model([input_sequence, question], answer)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",