Skip to content
This repository has been archived by the owner on Apr 13, 2018. It is now read-only.

Commit

Permalink
增加 wechat_echo 选项,支持将微信语音解析成指令(只支持百度stt)
Browse files Browse the repository at this point in the history
  • Loading branch information
wzpan committed Oct 25, 2017
1 parent 77a1f12 commit 115e3fd
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 24 deletions.
14 changes: 14 additions & 0 deletions client/audio_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python
# coding: utf-8
import os
from pydub import AudioSegment


def mp3_to_wav(mp3_file):
target = mp3_file.replace(".mp3", ".wav")
if os.path.exists(mp3_file):
voice = AudioSegment.from_mp3(mp3_file)
voice.export(target, format="wav")
return target
else:
print u"文件错误"
4 changes: 2 additions & 2 deletions client/mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,
"""

RATE = 16000
CHUNK = 1024
CHUNK = 8192
LISTEN_TIME = 12

# check if no threshold provided
Expand All @@ -292,7 +292,7 @@ def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,

for i in range(0, RATE / CHUNK * LISTEN_TIME):
try:
data = stream.read(CHUNK, exception_on_overflow=False)
data = stream.read(CHUNK)
frames.append(data)
score = self.getScore(data)

Expand Down
16 changes: 12 additions & 4 deletions client/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,22 @@ def transcribe(self, fp):
fp,
exc_info=True)
return []
Param = '{"auf":"16k","aue":"raw","scene":"main"}'
XParam = base64.b64encode(Param)
n_frames = wav_file.getnframes()
frame_rate = wav_file.getframerate()
Param = str({
"auf": "16k",
"aue": "raw",
"scene": "main",
"sample_rate": "%s" % str(frame_rate)
})
XParam = base64.b64encode(Param)
audio = wav_file.readframes(n_frames)
base_data = base64.b64encode(audio)
data = {
'voice_data': base_data,
'api_id': self.api_id,
'api_key': self.api_key,
'sample_rate': frame_rate,
'XParam': XParam
}
r = requests.post(self.url, data=data)
Expand Down Expand Up @@ -415,7 +422,7 @@ def __init__(self, ak_id, ak_secret):
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# Try to get iflytek_yuyin config from config
# Try to get ali_yuyin config from config
profile_path = dingdangpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
Expand Down Expand Up @@ -450,6 +457,7 @@ def transcribe(self, fp):
exc_info=True)
return []
n_frames = wav_file.getnframes()
frame_rate = wav_file.getframerate()
audio = wav_file.readframes(n_frames)
date = datetime.datetime.strftime(datetime.datetime.utcnow(),
"%a, %d %b %Y %H:%M:%S GMT")
Expand All @@ -460,7 +468,7 @@ def transcribe(self, fp):
}
headers = {
'authorization': '',
'content-type': 'audio/wav; samplerate=16000',
'content-type': 'audio/wav; samplerate=%s' % str(frame_rate),
'accept': 'application/json',
'date': date,
'Content-Length': str(len(audio))
Expand Down
58 changes: 40 additions & 18 deletions dingdang.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from client.wxbot import WXBot
from client.conversation import Conversation
from client.tts import SimpleMp3Player

from client.audio_utils import mp3_to_wav

# Add dingdangpath.LIB_PATH to sys.path
sys.path.append(dingdangpath.LIB_PATH)

Expand Down Expand Up @@ -42,31 +45,50 @@ def __init__(self, brain):
self.music_mode = None
self.last = time.time()

def handle_music_mode(self, msg_data):
# avoid repeating command
now = time.time()
if (now - self.last) > 0.5:
# stop passive listening
self.brain.mic.stopPassiveListen()
self.last = now
if not self.music_mode.delegating:
self.music_mode.delegating = True
self.music_mode.delegateInput(msg_data, True)
if self.music_mode is not None:
self.music_mode.delegating = False

def handle_msg_all(self, msg):
# ignore the msg when handling plugins
if msg['msg_type_id'] == 1 and \
msg['to_user_id'] == self.my_account['UserName']: # reply to self

msg['to_user_id'] == self.my_account['UserName']:
# reply to self
if msg['content']['type'] == 0:
msg_data = msg['content']['data']
if self.music_mode is not None:
# avoid repeating command
now = time.time()
if (now - self.last) > 0.5:
# stop passive listening
self.brain.mic.stopPassiveListen()
self.last = now
if not self.music_mode.delegating:
self.music_mode.delegating = True
self.music_mode.delegateInput(msg_data, True)
if self.music_mode is not None:
self.music_mode.delegating = False
return
return self.handle_music_mode(msg_data)
self.brain.query([msg_data], self, True)
elif msg['content']['type'] == 4: # echo voice
player = SimpleMp3Player()
player.play_mp3(os.path.join(dingdangpath.TEMP_PATH,
'voice_%s.mp3' % msg['msg_id']))
elif msg['content']['type'] == 4:
mp3_file = os.path.join(dingdangpath.TEMP_PATH,
'voice_%s.mp3' % msg['msg_id'])
profile = self.brain.profile
# echo or command?
if 'wechat_echo' in profile and not profile['wechat_echo']:
# 执行命令
mic = self.brain.mic
wav_file = mp3_to_wav(mp3_file)
with open(wav_file) as f:
command = mic.active_stt_engine.transcribe(f)
if command:
if self.music_mode is not None:
return self.handle_music_mode(msg_data)
self.brain.query(command, self, True)
else:
mic.say("什么?")
else:
# 播放语音
player = SimpleMp3Player()
player.play_mp3(mp3_file)


class Dingdang(object):
Expand Down

0 comments on commit 115e3fd

Please # to comment.