diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 106c66b96..16c7ff726 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: run: git fetch origin ${{ github.base_ref }} - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" architecture: x64 - name: Get pip cache dir id: pip-cache @@ -33,7 +33,7 @@ jobs: ${{ runner.os }}-pip-pre-commit - name: pre-commit run: | - pip install -U pre-commit + pip install --upgrade pre-commit pre-commit install --install-hooks pre-commit run --all-files whisper-test: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f5a74b6d..48df249ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: check-json - id: end-of-file-fixer @@ -11,17 +11,17 @@ repos: - id: check-added-large-files args: [--maxkb=4096] - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort name: isort (python) args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"] - repo: https://github.com/pycqa/flake8.git - rev: 6.0.0 + rev: 7.1.1 hooks: - id: flake8 types: [python] diff --git a/whisper/normalizers/basic.py b/whisper/normalizers/basic.py index a82403203..8690ae71c 100644 --- a/whisper/normalizers/basic.py +++ b/whisper/normalizers/basic.py @@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""): and drop any diacritics (category 'Mn' and some manual mappings) """ return "".join( - c - if c in keep - else ADDITIONAL_DIACRITICS[c] - if c in ADDITIONAL_DIACRITICS - else "" - if unicodedata.category(c) == "Mn" - else " " - if unicodedata.category(c)[0] in "MSP" - else c + ( + c + if c in keep + else ( + ADDITIONAL_DIACRITICS[c] + if c in ADDITIONAL_DIACRITICS + else ( + "" + if unicodedata.category(c) == "Mn" + else " " if unicodedata.category(c)[0] in "MSP" else c + ) + ) + ) for c in unicodedata.normalize("NFKD", s) ) diff --git a/whisper/utils.py b/whisper/utils.py index 9b9b13862..13792f764 100644 --- a/whisper/utils.py +++ b/whisper/utils.py @@ -209,9 +209,11 @@ def iterate_subtitles(): yield start, end, "".join( [ - re.sub(r"^(\s*)(.*)$", r"\1\2", word) - if j == i - else word + ( + re.sub(r"^(\s*)(.*)$", r"\1\2", word) + if j == i + else word + ) for j, word in enumerate(all_words) ] )