diff --git a/whisper/tokenizer.py b/whisper/tokenizer.py
index 5ae691d5f..88142c43c 100644
--- a/whisper/tokenizer.py
+++ b/whisper/tokenizer.py
@@ -245,9 +245,7 @@ def non_speech_tokens(self) -> Tuple[int]:
 
         keeping basic punctuations like commas, periods, question marks, exclamation points, etc.
         """
-
-        result = set()
-        symbols = list("'\"#()*+-/:;<=>@[\\]^_`{|}~「」『』")
+        symbols = list("\"#()*+/:;<=>@[\\]^_`{|}~「」『』")
         symbols += "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split()
 
         # symbols that may be a single token or multiple tokens depending on the tokenizer.
@@ -257,6 +255,8 @@ def non_speech_tokens(self) -> Tuple[int]:
         miscellaneous = set("♩♪♫♬♭♮♯")
         assert all(0x2640 <= ord(c) <= 0x267F for c in miscellaneous)
 
+        # allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
+        result = {self.tokenizer.encode(" -")[0], self.tokenizer.encode(" '")[0]}
         for symbol in symbols + list(miscellaneous):
             for tokens in [self.tokenizer.encode(symbol), self.tokenizer.encode(" " + symbol)]:
                 if len(tokens) == 1 or symbol in miscellaneous: