squad = json.load(f)
for article in squad["data"]:
if "title" in article:
title = article["title"].strip()
else:
title = ""
for paragraph in article["paragraphs"]:
context = paragraph["context"].strip()
for qa in paragraph["qas"]:
question = qa["question"].strip()
id_ = qa["id"]
answer_starts = [answer["answer_start"] for answer in qa["answers"]]
answers = [answer["text"].strip() for answer in qa["answers"]]
// Features currently used are "context", "question", and "answers".
// Others are extracted here for the ease of future expansions.
example = {
"title": title,
"context": context,
"question": question,
"id": id_,
"answer_starts": answer_starts,
"answers": answers,
}
yield {
"question": example["question"],
// TODO(b/121176753): return all the answers.
"first_answer": example["answers"][0],
"context": example["context"]
}
After Change
with tf.io.gfile.GFile(filepath) as f:
squad = json.load(f)
for article in squad["data"]:
title = article.get("title", "").strip()
for paragraph in article["paragraphs"]:
context = paragraph["context"].strip()
for qa in paragraph["qas"]:
question = qa["question"].strip()