Skip to content

Commit

Permalink
[fix] fix order (#2428)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mddct authored Mar 20, 2024
1 parent e77e8f1 commit 6f77461
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions wenet/dataset/datapipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,10 @@ def __init__(self,
shuffle_size: int = 10000,
cycle: int = 1) -> None:
super().__init__()
self.dp = TextLineDataPipe(filenames).repeat(cycle).prefetch(
prefetch)
self.dp = TextLineDataPipe(filenames)
if shuffle:
self.dp = self.dp.shuffle(buffer_size=shuffle_size)
self.dp = self.dp.repeat(cycle).prefetch(prefetch)
self.dp = self.dp.shard(partition)

def __iter__(self):
Expand All @@ -416,9 +416,10 @@ def __init__(self,
shuffle_size: int = 10000,
cycle: int = 1) -> None:
super().__init__()
self.dp = TextLineDataPipe(filenames).repeat(cycle)
self.dp = TextLineDataPipe(filenames)
if shuffle:
self.dp = self.dp.shuffle(buffer_size=shuffle_size)
self.dp = self.dp.repeat(cycle)
self.dp = self.dp.shard(partition).map_ignore_error(
parse_url).tar_file_and_group().prefetch(prefetch)

Expand Down

0 comments on commit 6f77461

Please # to comment.