Skip to content

Commit

Permalink
unify shuff typo
Browse files Browse the repository at this point in the history
  • Loading branch information
Mddct committed Mar 20, 2024
1 parent 11eb863 commit 0f2f339
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions wenet/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,13 @@ def Dataset(data_type,
assert data_type in ['raw', 'shard']
# cycle dataset
cycle = conf.get('cycle', 1)
list_shuffle = conf.get('list_shuffle', False)
list_shuffle_size = conf.get('list_shuffle_size', 10000)

# stage1 shuffle: source
list_shuffle = conf.get('list_shuffle', True)
list_shuffle_size = 10000000
if list_shuffle:
list_shuffle_conf = conf.get('list_shuffle_conf', {})
list_shuffle_size = list_shuffle_conf.get('shuffle_size',
list_shuffle_size)
if data_type == 'raw':
dataset = WenetRawDatasetSource(data_list_file,
partition=partition,
Expand Down

0 comments on commit 0f2f339

Please # to comment.