diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 106c66b96..16c7ff726 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,7 +16,7 @@ jobs:
run: git fetch origin ${{ github.base_ref }}
- uses: actions/setup-python@v5
with:
- python-version: "3.8"
+ python-version: "3.9"
architecture: x64
- name: Get pip cache dir
id: pip-cache
@@ -33,7 +33,7 @@ jobs:
${{ runner.os }}-pip-pre-commit
- name: pre-commit
run: |
- pip install -U pre-commit
+ pip install --upgrade pre-commit
pre-commit install --install-hooks
pre-commit run --all-files
whisper-test:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3f5a74b6d..48df249ca 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.0.1
+ rev: v5.0.0
hooks:
- id: check-json
- id: end-of-file-fixer
@@ -11,17 +11,17 @@ repos:
- id: check-added-large-files
args: [--maxkb=4096]
- repo: https://github.com/psf/black
- rev: 23.7.0
+ rev: 24.10.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
- rev: 5.12.0
+ rev: 5.13.2
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"]
- repo: https://github.com/pycqa/flake8.git
- rev: 6.0.0
+ rev: 7.1.1
hooks:
- id: flake8
types: [python]
diff --git a/whisper/normalizers/basic.py b/whisper/normalizers/basic.py
index a82403203..8690ae71c 100644
--- a/whisper/normalizers/basic.py
+++ b/whisper/normalizers/basic.py
@@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""):
and drop any diacritics (category 'Mn' and some manual mappings)
"""
return "".join(
- c
- if c in keep
- else ADDITIONAL_DIACRITICS[c]
- if c in ADDITIONAL_DIACRITICS
- else ""
- if unicodedata.category(c) == "Mn"
- else " "
- if unicodedata.category(c)[0] in "MSP"
- else c
+ (
+ c
+ if c in keep
+ else (
+ ADDITIONAL_DIACRITICS[c]
+ if c in ADDITIONAL_DIACRITICS
+ else (
+ ""
+ if unicodedata.category(c) == "Mn"
+ else " " if unicodedata.category(c)[0] in "MSP" else c
+ )
+ )
+ )
for c in unicodedata.normalize("NFKD", s)
)
diff --git a/whisper/utils.py b/whisper/utils.py
index 9b9b13862..13792f764 100644
--- a/whisper/utils.py
+++ b/whisper/utils.py
@@ -209,9 +209,11 @@ def iterate_subtitles():
yield start, end, "".join(
[
- re.sub(r"^(\s*)(.*)$", r"\1\2", word)
- if j == i
- else word
+ (
+ re.sub(r"^(\s*)(.*)$", r"\1\2", word)
+ if j == i
+ else word
+ )
for j, word in enumerate(all_words)
]
)