extracted_files.extend(extract_archive(dataset_tar))
elif isinstance(URLS[dataset_name], str):
dataset_tar = download_from_url(URLS[dataset_name], root=root, hash_value=MD5[dataset_name], hash_type="md5")
extracted_files.extend(extract_archive(dataset_tar))
else:
raise ValueError(
"URLS for {} has to be in a form or list or string".format(
dataset_name))
After Change
// We need to take an extra step to pick out the specific language pair from it.
src_language = train_filenames[0].split(".")[-1]
tgt_language = train_filenames[1].split(".")[-1]
languages = "-".join([src_language, tgt_language])
iwslt_tar = ".data/2016-01/texts/{}/{}/{}.tgz"
iwslt_tar = iwslt_tar.format(
src_language, tgt_language, languages)
extracted_dataset_tar = extract_archive(iwslt_tar)
extracted_files.extend(extracted_dataset_tar)
else:
raise ValueError(
"URLS for {} has to be in a form or list or string".format(