From 8bdfe7c14b74d6178ce6b28f4541c01ae96e7c7d Mon Sep 17 00:00:00 2001 From: Mddct Date: Wed, 20 Mar 2024 14:44:02 +0800 Subject: [PATCH] stage1 shuffle max --- wenet/dataset/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wenet/dataset/dataset.py b/wenet/dataset/dataset.py index 6bb49c4da..8dbcbb970 100644 --- a/wenet/dataset/dataset.py +++ b/wenet/dataset/dataset.py @@ -14,6 +14,7 @@ # limitations under the License. from functools import partial +import sys from typing import Optional from wenet.dataset import processor from wenet.dataset.datapipes import (WenetRawDatasetSource, @@ -44,7 +45,7 @@ def Dataset(data_type, cycle = conf.get('cycle', 1) # stage1 shuffle: source list_shuffle = conf.get('list_shuffle', True) - list_shuffle_size = 10000000 + list_shuffle_size = sys.maxsize if list_shuffle: list_shuffle_conf = conf.get('list_shuffle_conf', {}) list_shuffle_size = list_shuffle_conf.get('shuffle_size',