-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtabletalk.py
executable file
·189 lines (150 loc) · 5.31 KB
/
tabletalk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/usr/bin/python
###############################################
# Tabletalk
# An experiment by Rory Petty
# 7/19/2011
###############################################
# Dependencies
# python: simplejson,pycurl
# pygame - http://f0o.com/~rene/stuff/pygame-1.9.2pre-py2.6-macosx10.6.mpkg.zip
# binaries: sox
# Known Issues
# 1. sox/rec would like to record at 44.1 kHz and other nice numbers, but the google speech API maxes at 44000. We can decrease
# the sampling rate but that might come at a cost of audio quality. Also, we're uploading 44.1 audio as 44.0, which could be
# effecting our transcription quality
# 2. probably need a better solution than just polling the recordings dir for new files
# 3. switch to pexpect to better kill subprocesses spawned by sox/rec
# 4. better packaging of dependent python modules
# 5. use queue module?
import sys
#sys.path.append('/Library/Python/2.6/site-packages')
sys.path.append("/Users/rory/Dropbox/dev/TableTalk/PyTagCloud/src/")
import os
import time
import subprocess
import logging
import simplejson
import urllib
import urllib2
import pycurl
import signal
from StringIO import StringIO
#import pexpect
SOXDIR = "sox-14.3.2"
REC = "recordings"
BR = "44100"
UPLOADED = "uploaded"
GOOG_SPEECH_URL='https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&lang=en-US'
YAHOO_APP_ID = 'YahooDemo' # Change this to your API key
YAHOO_TE_URL_BASE = 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
TE_PHRASES_FILE = "tabletalk_te_phrases.txt"
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
global te_file
class YahooSearchError(Exception):
pass
def main():
try:
signal.signal(signal.SIGINT, signal_handler) #register signal handler to catch ctrl-c program exit
if not os.path.exists(REC):
os.makedirs(REC)
if not os.path.exists(UPLOADED):
os.makedirs(UPLOADED)
# clear out previous recordings
deleteDirContents(REC)
deleteDirContents(UPLOADED)
# start sox
rec = subprocess.Popen(SOXDIR+"/rec -r "+BR+" "+REC+"/tabletalk.flac trim 0 00:00:10 : newfile : restart &> recording.log", shell=True)
#blank the file each time
te_file = open(TE_PHRASES_FILE,"w")
te_file.close()
while True:
files = sorted([f for f in os.listdir(REC)
if os.path.isfile( os.path.join(REC, f) )])
if files:
#logging.info(files)
body = StringIO()
c = pycurl.Curl()
c.setopt(c.POST, 1)
c.setopt(c.URL, GOOG_SPEECH_URL)
c.setopt(c.HTTPHEADER, ["Content-Type: audio/x-flac; rate=44000"])
c.setopt(c.HTTPPOST, [("myfile", (c.FORM_FILE, os.path.join(REC,files[0])))])
c.setopt(c.WRITEFUNCTION, body.write)
try:
c.perform()
except pycurl.error, e:
logging.info("Error: " + e)
http_code = c.getinfo(c.HTTP_CODE)
if http_code == 200:
body.seek(0)
ret = simplejson.loads(body.read())
#print "ret"
#print ret
#print "+++++++++++++++++"
hypotheses = ret['hypotheses']
#print hypotheses
print "+++++++++++++++++"
if len(hypotheses) > 0:
transcription = hypotheses[0]['utterance']
confidence = hypotheses[0]['confidence']
print "Google Speech API:"
print "Transcription: " + transcription
print "Confidence: " + str(confidence)
# send to yahoo term extraction api
try:
yte_ret = yahoo_term_extraction(transcription)
print "Results from Yahoo Term Extraction API:"
print yte_ret
with open(TE_PHRASES_FILE, "a") as f:
for s in yte_ret['Result']:
f.write(s + '\n')
f.close()
#logging.info("Moving " + files[0] + " to uploaded dir")
os.rename(os.path.join(REC,files[0]), os.path.join(UPLOADED,files[0]))
except YahooSearchError, e:
print "An API error occurred."
except IOError:
print "A network IO error occured during the call to the Yahoo Term Extraction API."
else:
logging.info("ERROR: http request failed")
c.close()
time.sleep(0.02)
print ""
print ""
else:
time.sleep(1) # keeps cpu from going to 100% when no files to process (tight while loop)
except KeyboardInterrupt:
os.kill(rec.pid, signal.SIGTERM)
def yahoo_term_extraction(context, query="", **kwargs):
kwargs.update({
'appid': YAHOO_APP_ID,
'context': context, #for the Yahoo TE API, context is the required string to search. query allows you to provide optional terms
'query': query,
'output': 'json'
})
url = YAHOO_TE_URL_BASE + '?' + urllib.urlencode(kwargs)
result = simplejson.load(urllib.urlopen(url))
if 'Error' in result:
# An error occurred; raise an exception
raise YahooSearchError, result['Error']
return result['ResultSet']
def call_command(command):
process = subprocess.Popen(command.split(' '),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return process.communicate()
def deleteDirContents(path):
for the_file in os.listdir(path):
file_path = os.path.join(path, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
except Exception, e:
print e
def signal_handler(signal, frame):
print 'You pressed Ctrl+C'
cleanup()
sys.exit(0)
def cleanup():
print "Exiting program"
if __name__ == "__main__":
main()