增加讯飞的语音识别和合成模块，增加阿里的语音识别模块 (#33)

* Update stt.py 增加了讯飞和阿里语音识别模块，需前往profile进行配置 * Update tts.py 增加讯飞语音合成，需前往profile配置相关信息 * Update tts.py * Update stt.py * Update tts.py * Update tts.py * Update stt.py * Update stt.py * Update stt.py * Update stt.py * Update stt.py * Update stt.py * Update stt.py * Update tts.py * Update tts.py * Update stt.py * Update stt.py * Update tts.py
wzpan · Sep 11, 2017 · e3e742a · e3e742a
1 parent 7486d1b
commit e3e742a
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 5 deletions.
diff --git a/client/stt.py b/client/stt.py
@@ -14,6 +14,10 @@
 import diagnose
 import vocabcompiler
 from uuid import getnode as get_mac
+import time
+import hashlib
+import datetime
+import hmac
 
 import sys
 
@@ -193,8 +197,7 @@ class BaiduSTT(AbstractSTTEngine):
     要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
     之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
     填入 profile.xml 中.
-
-        ...
+    ...
         baidu_yuyin: 'AIzaSyDoHmTEToZUQrltmORWS4Ott0OHVA62tw8'
             api_key: 'LMFYhLdXSSthxCNLR7uxFszQ'
             secret_key: '14dbd10057xu7b256e537455698c0e4e'
@@ -301,6 +304,218 @@ def is_available(cls):
         return diagnose.check_network_connection()
 
 
+class IFlyTekSTT(AbstractSTTEngine):
+    """
+    科大讯飞的语音识别API.
+    要使用本模块, 首先到 http://aiui.xfyun.cn/default/index 注册一个开发者账号,
+    之后创建一个新应用, 然后在应用管理的那查看 API id 和 API Key
+    填入 profile.xml 中.
+    """
+
+    SLUG = "iflytek-stt"
+
+    def __init__(self, api_id, api_key):
+        self._logger = logging.getLogger(__name__)
+        self.api_id = api_id
+        self.api_key = api_key
+
+    @classmethod
+    def get_config(cls):
+        # FIXME: Replace this as soon as we have a config module
+        config = {}
+        # Try to get iflytek_yuyin config from config
+        profile_path = dingdangpath.config('profile.yml')
+        if os.path.exists(profile_path):
+            with open(profile_path, 'r') as f:
+                profile = yaml.safe_load(f)
+                if 'iflytek_yuyin' in profile:
+                    if 'api_id' in profile['iflytek_yuyin']:
+                        config['api_id'] = \
+                            profile['iflytek_yuyin']['api_id']
+                    if 'api_key' in profile['iflytek_yuyin']:
+                        config['api_key'] = \
+                            profile['iflytek_yuyin']['api_key']
+        return config
+
+    def transcribe(self, fp):
+        try:
+            wav_file = wave.open(fp, 'rb')
+        except IOError:
+            self._logger.critical('wav file not found: %s',
+                                  fp,
+                                  exc_info=True)
+            return []
+        Param = '{"auf":"16k","aue":"raw","scene":"main"}'
+        XParam = base64.b64encode(Param)
+        n_frames = wav_file.getnframes()
+        audio = wav_file.readframes(n_frames)
+        base_data = base64.b64encode(audio)
+        data = {'data': base_data}
+        m = hashlib.md5()
+        m.update(self.api_key + str(int(time.time()))
+                 + XParam + 'data=' + base_data)
+        checksum = m.hexdigest()
+
+        headers = {
+            'X-Appid': self.api_id,
+            'X-CurTime': str(int(time.time())),
+            'X-Param': Param,
+            'X-CheckSum': checksum
+        }
+        r = requests.post('http://api.xfyun.cn/v1/aiui/v1/iat',
+                          data=data,
+                          headers=headers)
+        try:
+            r.raise_for_status()
+            text = ''
+            if r.json()['code'] == '00000':
+                text = r.json()['data']['result'].encode('utf-8')
+        except requests.exceptions.HTTPError:
+            self._logger.critical('Request failed with response: %r',
+                                  r.text,
+                                  exc_info=True)
+            return []
+        except requests.exceptions.RequestException:
+            self._logger.critical('Request failed.', exc_info=True)
+            return []
+        except ValueError as e:
+            self._logger.critical('Cannot parse response: %s',
+                                  e.args[0])
+            return []
+        except KeyError:
+            self._logger.critical('Cannot parse response.',
+                                  exc_info=True)
+            return []
+        else:
+            transcribed = []
+            if text:
+                transcribed.append(text.upper())
+            self._logger.info(u'讯飞语音识别到了: %s' % text)
+            return transcribed
+
+    @classmethod
+    def is_available(cls):
+        return diagnose.check_network_connection()
+
+
+class ALiBaBaSTT(AbstractSTTEngine):
+    """
+    阿里云的语音识别API.
+    要使用本模块, 首先到 https://data.aliyun.com/product/nls 注册一个开发者账号,
+    然后查看自己的AK信息，填入 profile.xml 中.
+    """
+
+    SLUG = "ali-stt"
+
+    def __init__(self, ak_id, ak_secret):
+        self._logger = logging.getLogger(__name__)
+        self.ak_id = ak_id
+        self.ak_secret = ak_secret
+
+    @classmethod
+    def get_config(cls):
+        # FIXME: Replace this as soon as we have a config module
+        config = {}
+        # Try to get iflytek_yuyin config from config
+        profile_path = dingdangpath.config('profile.yml')
+        if os.path.exists(profile_path):
+            with open(profile_path, 'r') as f:
+                profile = yaml.safe_load(f)
+                if 'ali_yuyin' in profile:
+                    if 'ak_id' in profile['ali_yuyin']:
+                        config['ak_id'] = \
+                            profile['ali_yuyin']['ak_id']
+                    if 'ak_secret' in profile['ali_yuyin']:
+                        config['ak_secret'] = \
+                            profile['ali_yuyin']['ak_secret']
+        return config
+
+    def to_md5_base64(self, strBody):
+        hash = hashlib.md5()
+        hash.update(self.body)
+        m = hash.digest().encode('base64').strip()
+        hash = hashlib.md5()
+        hash.update(m)
+        return hash.digest().encode('base64').strip()
+
+    def to_sha1_base64(self, stringToSign, secret):
+        hmacsha1 = hmac.new(secret, stringToSign, hashlib.sha1)
+        return base64.b64encode(hmacsha1.digest())
+
+    def transcribe(self, fp):
+        try:
+            wav_file = wave.open(fp, 'rb')
+        except IOError:
+            self._logger.critical('wav file not found: %s',
+                                  fp,
+                                  exc_info=True)
+            return []
+        n_frames = wav_file.getnframes()
+        audio = wav_file.readframes(n_frames)
+        date = datetime.datetime.strftime(datetime.datetime.utcnow(),
+                                          "%a, %d %b %Y %H:%M:%S GMT")
+        options = {
+            'url': 'https://nlsapi.aliyun.com/recognize?model=chat',
+            'method': 'POST',
+            'body': audio,
+        }
+        headers = {
+            'authorization': '',
+            'content-type': 'audio/wav; samplerate=16000',
+            'accept': 'application/json',
+            'date': date,
+            'Content-Length': str(len(audio))
+        }
+
+        self.body = ''
+        if 'body' in options:
+            self.body = options['body']
+
+        bodymd5 = ''
+        if not self.body == '':
+            bodymd5 = self.to_md5_base64(self.body)
+
+        stringToSign = options['method'] + '\n' + \
+            headers['accept'] + '\n' + bodymd5 + '\n' + \
+            headers['content-type'] + '\n' + headers['date']
+        signature = self.to_sha1_base64(stringToSign, self.ak_secret)
+
+        authHeader = 'Dataplus ' + self.ak_id + ':' + signature
+        headers['authorization'] = authHeader
+        url = options['url']
+        r = requests.post(url, data=self.body, headers=headers, verify=False)
+        try:
+            text = ''
+            if 'result' in r.json():
+                text = r.json()['result'].encode('utf-8')
+        except requests.exceptions.HTTPError:
+            self._logger.critical('Request failed with response: %r',
+                                  r.text,
+                                  exc_info=True)
+            return []
+        except requests.exceptions.RequestException:
+            self._logger.critical('Request failed.', exc_info=True)
+            return []
+        except ValueError as e:
+            self._logger.critical('Cannot parse response: %s',
+                                  e.args[0])
+            return []
+        except KeyError:
+            self._logger.critical('Cannot parse response.',
+                                  exc_info=True)
+            return []
+        else:
+            transcribed = []
+            if text:
+                transcribed.append(text.upper())
+            self._logger.info(u'阿里云语音识别到了: %s' % text)
+            return transcribed
+
+    @classmethod
+    def is_available(cls):
+        return diagnose.check_network_connection()
+
+
 class SnowboySTT(AbstractSTTEngine):
     """
     Snowboy STT 离线识别引擎（只适用于离线唤醒）

diff --git a/client/tts.py b/client/tts.py
@@ -416,12 +416,10 @@ def say(self, phrase):
 class BaiduTTS(AbstractMp3TTSEngine):
     """
     使用百度语音合成技术
-
     要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
     之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
     填入 profile.xml 中.
-
-        ...
+    ...
         baidu_yuyin: 'AIzaSyDoHmTEToZUQrltmORWS4Ott0OHVA62tw8'
             api_key: 'LMFYhLdXSSthxCNLR7uxFszQ'
             secret_key: '14dbd10057xu7b256e537455698c0e4e'
@@ -519,6 +517,69 @@ def say(self, phrase):
             os.remove(tmpfile)
 
 
+class IFlyTekTTS(AbstractMp3TTSEngine):
+    """
+    使用讯飞的语音合成技术
+    要使用本模块, 请先在 profile.xml 中启用本模块并选择合适的发音人.
+    """
+
+    SLUG = "iflytek-tts"
+
+    def __init__(self, vid='60170'):
+        self._logger = logging.getLogger(__name__)
+        self.vid = vid
+
+    @classmethod
+    def get_config(cls):
+        # FIXME: Replace this as soon as we have a config module
+        config = {}
+        # Try to get baidu_yuyin config from config
+        profile_path = dingdangpath.config('profile.yml')
+        if os.path.exists(profile_path):
+            with open(profile_path, 'r') as f:
+                profile = yaml.safe_load(f)
+                if 'iflytek_yuyin' in profile:
+                    if 'vid' in profile['iflytek_yuyin']:
+                        config['vid'] = \
+                            profile['iflytek_yuyin']['vid']
+        return config
+
+    @classmethod
+    def is_available(cls):
+        return diagnose.check_network_connection()
+
+    def split_sentences(self, text):
+        punctuations = ['.', '。', ';', '；', '\n']
+        for i in punctuations:
+            text = text.replace(i, '@@@')
+        return text.split('@@@')
+
+    def get_speech(self, phrase):
+        getinfo_url = 'http://www.peiyinge.com/make/getSynthSign'
+        voice_baseurl = 'http://proxy.peiyinge.com:17063/synth?ts='
+        data = {
+            'content': phrase.encode('utf8')
+        }
+        result_info = requests.post(getinfo_url, data=data).json()
+        content = urllib.quote(phrase.encode('utf8'))
+        ts = result_info['ts']
+        sign = result_info['sign']
+        voice_url = voice_baseurl + ts + '&sign=' + sign + \
+            '&vid=' + self.vid + '&volume=&speed=0&content=' + content
+        r = requests.get(voice_url)
+        with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
+            f.write(r.content)
+            tmpfile = f.name
+            return tmpfile
+
+    def say(self, phrase):
+        self._logger.debug(u"Saying '%s' with '%s'", phrase, self.SLUG)
+        tmpfile = self.get_speech(phrase)
+        if tmpfile is not None:
+            self.play_mp3(tmpfile)
+            os.remove(tmpfile)
+
+
 def get_default_engine_slug():
     return 'osx-tts' if platform.system().lower() == 'darwin' else 'espeak-tts'