diff --git a/wenet/dataset/dataset.py b/wenet/dataset/dataset.py index 6bb49c4da..8dbcbb970 100644 --- a/wenet/dataset/dataset.py +++ b/wenet/dataset/dataset.py @@ -14,6 +14,7 @@ # limitations under the License. from functools import partial +import sys from typing import Optional from wenet.dataset import processor from wenet.dataset.datapipes import (WenetRawDatasetSource, @@ -44,7 +45,7 @@ def Dataset(data_type, cycle = conf.get('cycle', 1) # stage1 shuffle: source list_shuffle = conf.get('list_shuffle', True) - list_shuffle_size = 10000000 + list_shuffle_size = sys.maxsize if list_shuffle: list_shuffle_conf = conf.get('list_shuffle_conf', {}) list_shuffle_size = list_shuffle_conf.get('shuffle_size',