From 6f77461c18253a9e7e431305f1d967425b353fff Mon Sep 17 00:00:00 2001 From: Dinghao Zhou Date: Wed, 20 Mar 2024 16:27:35 +0800 Subject: [PATCH] [fix] fix order (#2428) --- wenet/dataset/datapipes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wenet/dataset/datapipes.py b/wenet/dataset/datapipes.py index ccb2d83ad..6d89ab552 100644 --- a/wenet/dataset/datapipes.py +++ b/wenet/dataset/datapipes.py @@ -395,10 +395,10 @@ def __init__(self, shuffle_size: int = 10000, cycle: int = 1) -> None: super().__init__() - self.dp = TextLineDataPipe(filenames).repeat(cycle).prefetch( - prefetch) + self.dp = TextLineDataPipe(filenames) if shuffle: self.dp = self.dp.shuffle(buffer_size=shuffle_size) + self.dp = self.dp.repeat(cycle).prefetch(prefetch) self.dp = self.dp.shard(partition) def __iter__(self): @@ -416,9 +416,10 @@ def __init__(self, shuffle_size: int = 10000, cycle: int = 1) -> None: super().__init__() - self.dp = TextLineDataPipe(filenames).repeat(cycle) + self.dp = TextLineDataPipe(filenames) if shuffle: self.dp = self.dp.shuffle(buffer_size=shuffle_size) + self.dp = self.dp.repeat(cycle) self.dp = self.dp.shard(partition).map_ignore_error( parse_url).tar_file_and_group().prefetch(prefetch)