6f39f48519a3e4fdc3a1e44b86dba00b3fcbbae3,open_seq2seq/data/text2text/text2text.py,ParallelTextDataLayer,build_graph,#ParallelTextDataLayer#,149

Before Change


      .map(lambda tokens: (tokens, tf.size(tokens)),
           num_parallel_calls=self._map_parallel_calls)

    _src_tgt_dataset = tf.data.Dataset.zip((_sources, _targets)).filter(
      lambda t1, t2: tf.logical_and(tf.less_equal(t1[1], self.max_len),
                                    tf.less_equal(t2[1], self.max_len))
    )

    if self._num_workers > 1:
      _src_tgt_dataset = _src_tgt_dataset\
        .shard(num_shards=self._num_workers, index=self._worker_id)


    if self.params["shuffle"]:
      _src_tgt_dataset = _src_tgt_dataset\
        .shuffle(buffer_size=self.get_size_in_samples())
    else:
      _src_tgt_dataset = _src_tgt_dataset

    if self.params["repeat"]:
      _src_tgt_dataset = _src_tgt_dataset.repeat()

    self.batched_dataset = _src_tgt_dataset.padded_batch(
      self._batch_size,
      padded_shapes=((tf.TensorShape([None]),
                      tf.TensorShape([])),
                     (tf.TensorShape([None]),
                      tf.TensorShape([]))),
      padding_values=(
      (SpecialTextTokens.PAD_ID.value,
       0),
      (SpecialTextTokens.PAD_ID.value,
       0))).prefetch(buffer_size=self._prefetch_buffer_size)

    self._iterator = self.batched_dataset.make_initializable_iterator()

    if self.params["mode"] == "train" or self.params["mode"] == "eval":
      t1, t2 = self.iterator.get_next()

After Change


      .map(lambda tokens: (tokens, tf.size(tokens)),
           num_parallel_calls=self._map_parallel_calls)

    _src_tgt_dataset = tf.data.Dataset.zip((_sources, _targets)).filter(
      lambda t1, t2: tf.logical_and(tf.less_equal(t1[1], self.max_len),
                                    tf.less_equal(t2[1], self.max_len))
    ).cache()

    if self._num_workers > 1:
      _src_tgt_dataset = _src_tgt_dataset\
        .shard(num_shards=self._num_workers, index=self._worker_id)


    if self.params["shuffle"]:
      _src_tgt_dataset = _src_tgt_dataset\
        .shuffle(buffer_size=self.get_size_in_samples())
    else:
      _src_tgt_dataset = _src_tgt_dataset

    if self.params["repeat"]:
      _src_tgt_dataset = _src_tgt_dataset.repeat()

    self.batched_dataset = _src_tgt_dataset.padded_batch(
      self._batch_size,
      padded_shapes=((tf.TensorShape([None]),
                      tf.TensorShape([])),
                     (tf.TensorShape([None]),
                      tf.TensorShape([]))),
      padding_values=(
      (SpecialTextTokens.PAD_ID.value,
       0),
      (SpecialTextTokens.PAD_ID.value,
       0))).prefetch(buffer_size=self._prefetch_buffer_size)

    self._iterator = self.batched_dataset.make_initializable_iterator()

    if self.params["mode"] == "train" or self.params["mode"] == "eval":
      t1, t2 = self.iterator.get_next()
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 6

Instances


Project Name: NVIDIA/OpenSeq2Seq
Commit Name: 6f39f48519a3e4fdc3a1e44b86dba00b3fcbbae3
Time: 2018-05-22
Author: okuchaiev@nvidia.com
File Name: open_seq2seq/data/text2text/text2text.py
Class Name: ParallelTextDataLayer
Method Name: build_graph


Project Name: NVIDIA/OpenSeq2Seq
Commit Name: 36c22f3f6b96cd02d95f436c58186a7acfa4abf0
Time: 2018-05-17
Author: okuchaiev@nvidia.com
File Name: open_seq2seq/data/text2text/text2text.py
Class Name: ParallelTextDataLayer
Method Name: build_graph


Project Name: NVIDIA/OpenSeq2Seq
Commit Name: ca06d634cbd61d7f3c4a38a56351de46e12a675f
Time: 2018-05-08
Author: okuchaiev@nvidia.com
File Name: open_seq2seq/data/text2text.py
Class Name: ParallelTextDataLayer
Method Name: build_graph


Project Name: NifTK/NiftyNet
Commit Name: 4b2e09598a90ed402baace88a95e74b6440ecdce
Time: 2017-08-22
Author: wenqi.li@ucl.ac.uk
File Name: niftynet/utilities/user_parameters_regex.py
Class Name:
Method Name: