From 141204bc9ade23d404928980e9af9596ab079983 Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Tue, 17 Jan 2023 23:28:36 -0800 Subject: [PATCH] print '?' if a letter can't be encoded using the system default encoding (#859) --- whisper/transcribe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/whisper/transcribe.py b/whisper/transcribe.py index 16c98a6f6..a8dc3ebe2 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -1,5 +1,6 @@ import argparse import os +import sys import warnings from typing import List, Optional, Tuple, Union, TYPE_CHECKING @@ -167,7 +168,10 @@ def add_segment( } ) if verbose: - print(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}") + line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n" + # compared to just `print(line)`, this replaces any character not representable using + # the system default encoding with an '?', avoiding UnicodeEncodeError. + sys.stderr.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace")) # show the progress bar when verbose is False (otherwise the transcribed text will be printed) num_frames = mel.shape[-1]