23dffb96ac95827a3af89f6ff027d254284ba93c,onmt/inputters/inputter.py,DatasetLazyIter,iter,#DatasetLazyIter#,528

Before Change


        for path in paths:
            cur_dataset = torch.load(path)
            logger.info("Loading dataset from %s, number of examples: %d" %
                        (path, len(cur_dataset)))
            cur_dataset.fields = self.fields
            cur_iter = OrderedIterator(
                dataset=cur_dataset,

After Change


        paths = self._paths
        if self.is_train and self.repeat:
            // Cycle through the shards indefinitely.
            paths = cycle(paths)
        for path in paths:
            for batch in self._iter_dataset(path):
                yield batch
                num_batches += 1
        if self.is_train and not self.repeat and \
           num_batches % self.num_batches_multiple != 0:
            // When the dataset is not repeated, we might need to ensure that
            // the number of returned batches is the multiple of a given value.
            // This is important for multi GPU training to ensure that all
            // workers have the same number of batches to process.
            for path in paths:
                for batch in self._iter_dataset(path):
                    yield batch
                    num_batches += 1
                    if num_batches % self.num_batches_multiple == 0:
                        return


def max_tok_len(new, count, sofar):
    
    In token batching scheme, the number of sequences is limited
    such that the total number of src/tgt tokens (including padding)

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 5

Instances

Link

Project Name: OpenNMT/OpenNMT-py

Commit Name: 23dffb96ac95827a3af89f6ff027d254284ba93c

Time: 2019-02-08

Author: guillaumekln@users.noreply.github.com

File Name: onmt/inputters/inputter.py

Class Name: DatasetLazyIter

Method Name: __iter__

Link

Project Name: streamlit/streamlit

Commit Name: 50c87bd8a64ced239cf0e3e12fa0d78de9895574

Time: 2018-06-18

Author: armando@playground.global

File Name: lib/streamlit/Proxy/websocket.py

Class Name: ClientWebSocket

Method Name: open

Link

Project Name: tensorlayer/tensorlayer

Commit Name: f2073333b710a340403843763ba60eb1e6699916

Time: 2019-04-11

Author: rundi_wu@pku.edu.cn

File Name: examples/data_process/tutorial_tfrecord2.py

Class Name:

Method Name: read_and_decode

23dffb96ac95827a3af89f6ff027d254284ba93c,onmt/inputters/inputter.py,DatasetLazyIter,__iter__,#DatasetLazyIter#,528

Before Change

After Change

Instances

23dffb96ac95827a3af89f6ff027d254284ba93c,onmt/inputters/inputter.py,DatasetLazyIter,iter,#DatasetLazyIter#,528