增加 wechat_echo 选项，支持将微信语音解析成指令（只支持百度stt）

wzpan · Oct 25, 2017 · 115e3fd · 115e3fd
1 parent 77a1f12
commit 115e3fd
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 24 deletions.
diff --git a/client/audio_utils.py b/client/audio_utils.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+# coding: utf-8
+import os
+from pydub import AudioSegment
+
+
+def mp3_to_wav(mp3_file):
+    target = mp3_file.replace(".mp3", ".wav")
+    if os.path.exists(mp3_file):
+        voice = AudioSegment.from_mp3(mp3_file)
+        voice.export(target, format="wav")
+        return target
+    else:
+        print u"文件错误"
diff --git a/client/mic.py b/client/mic.py
@@ -269,7 +269,7 @@ def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,
         """
 
         RATE = 16000
-        CHUNK = 1024
+        CHUNK = 8192
         LISTEN_TIME = 12
 
         # check if no threshold provided
@@ -292,7 +292,7 @@ def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True,
 
         for i in range(0, RATE / CHUNK * LISTEN_TIME):
             try:
-                data = stream.read(CHUNK, exception_on_overflow=False)
+                data = stream.read(CHUNK)
                 frames.append(data)
                 score = self.getScore(data)
 

diff --git a/client/stt.py b/client/stt.py
@@ -350,15 +350,22 @@ def transcribe(self, fp):
                                   fp,
                                   exc_info=True)
             return []
-        Param = '{"auf":"16k","aue":"raw","scene":"main"}'
-        XParam = base64.b64encode(Param)
         n_frames = wav_file.getnframes()
+        frame_rate = wav_file.getframerate()
+        Param = str({
+            "auf": "16k",
+            "aue": "raw",
+            "scene": "main",
+            "sample_rate": "%s" % str(frame_rate)
+        })
+        XParam = base64.b64encode(Param)
         audio = wav_file.readframes(n_frames)
         base_data = base64.b64encode(audio)
         data = {
             'voice_data': base_data,
             'api_id': self.api_id,
             'api_key': self.api_key,
+            'sample_rate': frame_rate,
             'XParam': XParam
         }
         r = requests.post(self.url, data=data)
@@ -415,7 +422,7 @@ def __init__(self, ak_id, ak_secret):
     def get_config(cls):
         # FIXME: Replace this as soon as we have a config module
         config = {}
-        # Try to get iflytek_yuyin config from config
+        # Try to get ali_yuyin config from config
         profile_path = dingdangpath.config('profile.yml')
         if os.path.exists(profile_path):
             with open(profile_path, 'r') as f:
@@ -450,6 +457,7 @@ def transcribe(self, fp):
                                   exc_info=True)
             return []
         n_frames = wav_file.getnframes()
+        frame_rate = wav_file.getframerate()
         audio = wav_file.readframes(n_frames)
         date = datetime.datetime.strftime(datetime.datetime.utcnow(),
                                           "%a, %d %b %Y %H:%M:%S GMT")
@@ -460,7 +468,7 @@ def transcribe(self, fp):
         }
         headers = {
             'authorization': '',
-            'content-type': 'audio/wav; samplerate=16000',
+            'content-type': 'audio/wav; samplerate=%s' % str(frame_rate),
             'accept': 'application/json',
             'date': date,
             'Content-Length': str(len(audio))

diff --git a/dingdang.py b/dingdang.py
@@ -15,6 +15,9 @@
 from client.wxbot import WXBot
 from client.conversation import Conversation
 from client.tts import SimpleMp3Player
+
+from client.audio_utils import mp3_to_wav
+
 # Add dingdangpath.LIB_PATH to sys.path
 sys.path.append(dingdangpath.LIB_PATH)
 
@@ -42,31 +45,50 @@ def __init__(self, brain):
         self.music_mode = None
         self.last = time.time()
 
+    def handle_music_mode(self, msg_data):
+        # avoid repeating command
+        now = time.time()
+        if (now - self.last) > 0.5:
+            # stop passive listening
+            self.brain.mic.stopPassiveListen()
+            self.last = now
+            if not self.music_mode.delegating:
+                self.music_mode.delegating = True
+                self.music_mode.delegateInput(msg_data, True)
+                if self.music_mode is not None:
+                    self.music_mode.delegating = False
+
     def handle_msg_all(self, msg):
         # ignore the msg when handling plugins
         if msg['msg_type_id'] == 1 and \
-           msg['to_user_id'] == self.my_account['UserName']:  # reply to self
-
+           msg['to_user_id'] == self.my_account['UserName']:
+            # reply to self
             if msg['content']['type'] == 0:
                 msg_data = msg['content']['data']
                 if self.music_mode is not None:
-                    # avoid repeating command
-                    now = time.time()
-                    if (now - self.last) > 0.5:
-                        # stop passive listening
-                        self.brain.mic.stopPassiveListen()
-                        self.last = now
-                        if not self.music_mode.delegating:
-                            self.music_mode.delegating = True
-                            self.music_mode.delegateInput(msg_data, True)
-                            if self.music_mode is not None:
-                                self.music_mode.delegating = False
-                    return
+                    return self.handle_music_mode(msg_data)
                 self.brain.query([msg_data], self, True)
-            elif msg['content']['type'] == 4:  # echo voice
-                player = SimpleMp3Player()
-                player.play_mp3(os.path.join(dingdangpath.TEMP_PATH,
-                                             'voice_%s.mp3' % msg['msg_id']))
+            elif msg['content']['type'] == 4:
+                mp3_file = os.path.join(dingdangpath.TEMP_PATH,
+                                        'voice_%s.mp3' % msg['msg_id'])
+                profile = self.brain.profile
+                # echo or command?
+                if 'wechat_echo' in profile and not profile['wechat_echo']:
+                    # 执行命令
+                    mic = self.brain.mic
+                    wav_file = mp3_to_wav(mp3_file)
+                    with open(wav_file) as f:
+                        command = mic.active_stt_engine.transcribe(f)
+                        if command:
+                            if self.music_mode is not None:
+                                return self.handle_music_mode(msg_data)
+                            self.brain.query(command, self, True)
+                        else:
+                            mic.say("什么？")
+                else:
+                    # 播放语音
+                    player = SimpleMp3Player()
+                    player.play_mp3(mp3_file)
 
 
 class Dingdang(object):