Skip to content

Commit

Permalink
Только Leipzig
Browse files Browse the repository at this point in the history
  • Loading branch information
sakentsunofu committed Jan 10, 2024
1 parent 9472611 commit d03f526
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/dataset_preparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def run(self):
"""Run the dataset preparation process: download, merge, filter, and clean the dataset."""
logging.info("Running dataset preparation for: %s", self.dataset_name)
self.download()
self.merge()
# self.merge()
self.filter()
self.clean()
logging.info("Dataset preparation completed for: %s", self.dataset_name)
8 changes: 4 additions & 4 deletions src/multidomain_kazakh_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
class MultidomainKazakhDataset(DatasetPreparer):
def __init__(self):
files = [
'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/kazakhNews.csv',
'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/kazakhBooks.csv',
# 'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/kazakhNews.csv',
# 'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/kazakhBooks.csv',
'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/leipzig.csv',
'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/oscar.csv',
'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/cc100-monolingual-crawled-data.csv'
# 'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/oscar.csv',
# 'https://huggingface.co/datasets/kz-transformers/multidomain-kazakh-dataset/resolve/main/cc100-monolingual-crawled-data.csv'
]
super().__init__('multidomain-kazakh-dataset', files)

0 comments on commit d03f526

Please # to comment.