Skip to content
This repository has been archived by the owner on Apr 13, 2018. It is now read-only.

Commit

Permalink
增加讯飞的语音识别和合成模块,增加阿里的语音识别模块 (#33)
Browse files Browse the repository at this point in the history
* Update stt.py

增加了讯飞和阿里语音识别模块,需前往profile进行配置

* Update tts.py

增加讯飞语音合成,需前往profile配置相关信息

* Update tts.py

* Update stt.py

* Update tts.py

* Update tts.py

* Update stt.py

* Update stt.py

* Update stt.py

* Update stt.py

* Update stt.py

* Update stt.py

* Update stt.py

* Update tts.py

* Update tts.py

* Update stt.py

* Update stt.py

* Update tts.py
  • Loading branch information
musistudio authored and wzpan committed Sep 11, 2017
1 parent 7486d1b commit e3e742a
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 5 deletions.
219 changes: 217 additions & 2 deletions client/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
import diagnose
import vocabcompiler
from uuid import getnode as get_mac
import time
import hashlib
import datetime
import hmac

import sys

Expand Down Expand Up @@ -193,8 +197,7 @@ class BaiduSTT(AbstractSTTEngine):
要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
填入 profile.xml 中.
...
...
baidu_yuyin: 'AIzaSyDoHmTEToZUQrltmORWS4Ott0OHVA62tw8'
api_key: 'LMFYhLdXSSthxCNLR7uxFszQ'
secret_key: '14dbd10057xu7b256e537455698c0e4e'
Expand Down Expand Up @@ -301,6 +304,218 @@ def is_available(cls):
return diagnose.check_network_connection()


class IFlyTekSTT(AbstractSTTEngine):
"""
科大讯飞的语音识别API.
要使用本模块, 首先到 http://aiui.xfyun.cn/default/index 注册一个开发者账号,
之后创建一个新应用, 然后在应用管理的那查看 API id 和 API Key
填入 profile.xml 中.
"""

SLUG = "iflytek-stt"

def __init__(self, api_id, api_key):
self._logger = logging.getLogger(__name__)
self.api_id = api_id
self.api_key = api_key

@classmethod
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# Try to get iflytek_yuyin config from config
profile_path = dingdangpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
profile = yaml.safe_load(f)
if 'iflytek_yuyin' in profile:
if 'api_id' in profile['iflytek_yuyin']:
config['api_id'] = \
profile['iflytek_yuyin']['api_id']
if 'api_key' in profile['iflytek_yuyin']:
config['api_key'] = \
profile['iflytek_yuyin']['api_key']
return config

def transcribe(self, fp):
try:
wav_file = wave.open(fp, 'rb')
except IOError:
self._logger.critical('wav file not found: %s',
fp,
exc_info=True)
return []
Param = '{"auf":"16k","aue":"raw","scene":"main"}'
XParam = base64.b64encode(Param)
n_frames = wav_file.getnframes()
audio = wav_file.readframes(n_frames)
base_data = base64.b64encode(audio)
data = {'data': base_data}
m = hashlib.md5()
m.update(self.api_key + str(int(time.time()))
+ XParam + 'data=' + base_data)
checksum = m.hexdigest()

headers = {
'X-Appid': self.api_id,
'X-CurTime': str(int(time.time())),
'X-Param': Param,
'X-CheckSum': checksum
}
r = requests.post('http://api.xfyun.cn/v1/aiui/v1/iat',
data=data,
headers=headers)
try:
r.raise_for_status()
text = ''
if r.json()['code'] == '00000':
text = r.json()['data']['result'].encode('utf-8')
except requests.exceptions.HTTPError:
self._logger.critical('Request failed with response: %r',
r.text,
exc_info=True)
return []
except requests.exceptions.RequestException:
self._logger.critical('Request failed.', exc_info=True)
return []
except ValueError as e:
self._logger.critical('Cannot parse response: %s',
e.args[0])
return []
except KeyError:
self._logger.critical('Cannot parse response.',
exc_info=True)
return []
else:
transcribed = []
if text:
transcribed.append(text.upper())
self._logger.info(u'讯飞语音识别到了: %s' % text)
return transcribed

@classmethod
def is_available(cls):
return diagnose.check_network_connection()


class ALiBaBaSTT(AbstractSTTEngine):
"""
阿里云的语音识别API.
要使用本模块, 首先到 https://data.aliyun.com/product/nls 注册一个开发者账号,
然后查看自己的AK信息,填入 profile.xml 中.
"""

SLUG = "ali-stt"

def __init__(self, ak_id, ak_secret):
self._logger = logging.getLogger(__name__)
self.ak_id = ak_id
self.ak_secret = ak_secret

@classmethod
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# Try to get iflytek_yuyin config from config
profile_path = dingdangpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
profile = yaml.safe_load(f)
if 'ali_yuyin' in profile:
if 'ak_id' in profile['ali_yuyin']:
config['ak_id'] = \
profile['ali_yuyin']['ak_id']
if 'ak_secret' in profile['ali_yuyin']:
config['ak_secret'] = \
profile['ali_yuyin']['ak_secret']
return config

def to_md5_base64(self, strBody):
hash = hashlib.md5()
hash.update(self.body)
m = hash.digest().encode('base64').strip()
hash = hashlib.md5()
hash.update(m)
return hash.digest().encode('base64').strip()

def to_sha1_base64(self, stringToSign, secret):
hmacsha1 = hmac.new(secret, stringToSign, hashlib.sha1)
return base64.b64encode(hmacsha1.digest())

def transcribe(self, fp):
try:
wav_file = wave.open(fp, 'rb')
except IOError:
self._logger.critical('wav file not found: %s',
fp,
exc_info=True)
return []
n_frames = wav_file.getnframes()
audio = wav_file.readframes(n_frames)
date = datetime.datetime.strftime(datetime.datetime.utcnow(),
"%a, %d %b %Y %H:%M:%S GMT")
options = {
'url': 'https://nlsapi.aliyun.com/recognize?model=chat',
'method': 'POST',
'body': audio,
}
headers = {
'authorization': '',
'content-type': 'audio/wav; samplerate=16000',
'accept': 'application/json',
'date': date,
'Content-Length': str(len(audio))
}

self.body = ''
if 'body' in options:
self.body = options['body']

bodymd5 = ''
if not self.body == '':
bodymd5 = self.to_md5_base64(self.body)

stringToSign = options['method'] + '\n' + \
headers['accept'] + '\n' + bodymd5 + '\n' + \
headers['content-type'] + '\n' + headers['date']
signature = self.to_sha1_base64(stringToSign, self.ak_secret)

authHeader = 'Dataplus ' + self.ak_id + ':' + signature
headers['authorization'] = authHeader
url = options['url']
r = requests.post(url, data=self.body, headers=headers, verify=False)
try:
text = ''
if 'result' in r.json():
text = r.json()['result'].encode('utf-8')
except requests.exceptions.HTTPError:
self._logger.critical('Request failed with response: %r',
r.text,
exc_info=True)
return []
except requests.exceptions.RequestException:
self._logger.critical('Request failed.', exc_info=True)
return []
except ValueError as e:
self._logger.critical('Cannot parse response: %s',
e.args[0])
return []
except KeyError:
self._logger.critical('Cannot parse response.',
exc_info=True)
return []
else:
transcribed = []
if text:
transcribed.append(text.upper())
self._logger.info(u'阿里云语音识别到了: %s' % text)
return transcribed

@classmethod
def is_available(cls):
return diagnose.check_network_connection()


class SnowboySTT(AbstractSTTEngine):
"""
Snowboy STT 离线识别引擎(只适用于离线唤醒)
Expand Down
67 changes: 64 additions & 3 deletions client/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,10 @@ def say(self, phrase):
class BaiduTTS(AbstractMp3TTSEngine):
"""
使用百度语音合成技术
要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
填入 profile.xml 中.
...
...
baidu_yuyin: 'AIzaSyDoHmTEToZUQrltmORWS4Ott0OHVA62tw8'
api_key: 'LMFYhLdXSSthxCNLR7uxFszQ'
secret_key: '14dbd10057xu7b256e537455698c0e4e'
Expand Down Expand Up @@ -519,6 +517,69 @@ def say(self, phrase):
os.remove(tmpfile)


class IFlyTekTTS(AbstractMp3TTSEngine):
"""
使用讯飞的语音合成技术
要使用本模块, 请先在 profile.xml 中启用本模块并选择合适的发音人.
"""

SLUG = "iflytek-tts"

def __init__(self, vid='60170'):
self._logger = logging.getLogger(__name__)
self.vid = vid

@classmethod
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# Try to get baidu_yuyin config from config
profile_path = dingdangpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
profile = yaml.safe_load(f)
if 'iflytek_yuyin' in profile:
if 'vid' in profile['iflytek_yuyin']:
config['vid'] = \
profile['iflytek_yuyin']['vid']
return config

@classmethod
def is_available(cls):
return diagnose.check_network_connection()

def split_sentences(self, text):
punctuations = ['.', '。', ';', ';', '\n']
for i in punctuations:
text = text.replace(i, '@@@')
return text.split('@@@')

def get_speech(self, phrase):
getinfo_url = 'http://www.peiyinge.com/make/getSynthSign'
voice_baseurl = 'http://proxy.peiyinge.com:17063/synth?ts='
data = {
'content': phrase.encode('utf8')
}
result_info = requests.post(getinfo_url, data=data).json()
content = urllib.quote(phrase.encode('utf8'))
ts = result_info['ts']
sign = result_info['sign']
voice_url = voice_baseurl + ts + '&sign=' + sign + \
'&vid=' + self.vid + '&volume=&speed=0&content=' + content
r = requests.get(voice_url)
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
f.write(r.content)
tmpfile = f.name
return tmpfile

def say(self, phrase):
self._logger.debug(u"Saying '%s' with '%s'", phrase, self.SLUG)
tmpfile = self.get_speech(phrase)
if tmpfile is not None:
self.play_mp3(tmpfile)
os.remove(tmpfile)


def get_default_engine_slug():
return 'osx-tts' if platform.system().lower() == 'darwin' else 'espeak-tts'

Expand Down

0 comments on commit e3e742a

Please # to comment.