-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoclapper.py
232 lines (187 loc) · 7.58 KB
/
autoclapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#! /usr/bin/env python
## Support for byteswapping audio streams (needed for AIFF format).
_typecode = {2:'h'}
def _init_typecode():
import array
for t in ('i', 'l'):
a = array.array(t)
if a.itemsize==4:
_typecode[4] = t
return
import sys
print "Can't find array typecode for 4 byte ints."
sys.exit(1)
_init_typecode()
def _byteswap(s,n):
"""Byteswap stream s, which is of width n bytes. Does nothing if n is 1.
Only supports widths listed in _typecode (2 & 4)."""
if n==1:
return s
import array
a = array.array( _typecode[n], s )
a.byteswap()
return a.tostring()
def _null(s,n):
"""Do nothing to stream s, which is of width n. See also: _byteswap(s,n)"""
return s
class SoundFile(object):
'''Wrapper for PCM sound stream, can be AIFF (aifc module)
or WAV (wave module).'''
def __init__(self, fname, template_obj=None):
if fname[-5:].lower() == '.aiff':
self._mod = __import__('aifc')
self._conv = _byteswap # AIFF is big-endian.
elif fname[-4:].lower() == '.wav':
self._mod = __import__('wave')
self._conv = _null
else:
print 'Unknown extension:', fname
import sys
sys.exit(1)
if template_obj:
# We will create & write to this file.
self.init_from_template(fname, template_obj)
else:
# We load from this file.
self.load(fname)
def bytes_per_frame(self):
return self.stream.getsampwidth() * self.stream.getnchannels()
def bytes_per_second(self):
return self.stream.getframerate() * self.bytes_per_frame()
def load(self, in_fname):
print 'load', self._mod.__name__, in_fname
self.stream = self._mod.open(in_fname, 'rb')
def read_lin(self):
fragment = self.stream.readframes( self.stream.getnframes() )
return self._conv(fragment, self.stream.getsampwidth())
def init_from_template(self, out_fname, template_obj):
print 'create', self._mod.__name__, out_fname
self.stream = self._mod.open(out_fname, 'wb')
self.stream.setnchannels( template_obj.stream.getnchannels() )
self.stream.setsampwidth( template_obj.stream.getsampwidth() )
self.stream.setframerate( template_obj.stream.getframerate() )
def write_lin(self, fragment):
self.stream.writeframes(self._conv(fragment, self.stream.getsampwidth()))
def close(self):
self.stream.close()
def coerce_lin(source_aiff, template_obj):
'''Read data from source, and convert it to match template's params.'''
import audioop
frag = source_aiff.read_lin()
Ss = source_aiff.stream
St = template_obj.stream
# Sample width
if Ss.getsampwidth() != St.getsampwidth():
print 'coerce sampwidth %i -> %i' %(Ss.getsampwidth(), St.getsampwidth())
frag = audioop.lin2lin(frag, Ss.getsampwidth(), St.getsampwidth())
width = St.getsampwidth()
# Channels
if Ss.getnchannels() != St.getnchannels():
print 'coerce nchannels %i -> %i' %(Ss.getnchannels(), St.getnchannels())
if Ss.getnchannels()==2 and St.getnchannels()==1:
frag = audioop.tomono(frag, width, 0.5, 0.5)
elif Ss.getnchannels()==1 and St.getnchannels()==2:
frag = audioop.tostereo(frag, width, 1.0, 1.0)
else:
print "Err: can't match channels"
# Frame rate
if Ss.getframerate() != St.getframerate():
print 'coerce framerate %i -> %i' %(Ss.getframerate(), St.getframerate())
frag,state = audioop.ratecv(
frag, width,
St.getnchannels(),
Ss.getframerate(), # in rate
St.getframerate(), # out rate
None, 2,1
)
return frag
def findfit(scratch_frag, final_frag, sound_file):
'''Calculates the offset (in seconds) between scratch_frag & final_frag.
Both fragments are assumed to contain the same, loud "clapper" event.
The SoundFile object is used for common stream parameters.'''
import audioop
nchannels = sound_file.stream.getnchannels()
framerate = sound_file.stream.getframerate()
width = sound_file.stream.getsampwidth()
assert(width==2)
# Simplify the sound streams to make it quicker to find a match.
# Left channel only.
if nchannels > 1:
scratch_frag_ = audioop.tomono(scratch_frag, width, 1, 0)
final_frag_ = audioop.tomono(final_frag, width, 1, 0)
else:
scratch_frag_ = scratch_frag
final_frag_ = final_frag
nchannels_ = 1
# Downsample to 8000/sec
framerate_ = 8000
scratch_frag_,state =\
audioop.ratecv(scratch_frag_, width, nchannels_, framerate, framerate_, None)
final_frag_,state =\
audioop.ratecv(final_frag_, width, nchannels_, framerate, framerate_, None)
bytes_per_second_ = nchannels_ * framerate_ * width
# Find the clapper in final
length_samples = int(0.001 * framerate * nchannels_) # 0.1 sec
final_off_samples = audioop.findmax(final_frag_, length_samples)
# Search for a 2 second 'needle' centred on where we found the 'clapper'
needle_bytes = 2 * bytes_per_second_
b0 = max(0, final_off_samples * width - int(needle_bytes/2))
print '"clapper" at final:', 1.0*b0/bytes_per_second_, 'sec'
b1 = b0 + needle_bytes
final_clapper_frag = final_frag_[b0:b1]
scratch_off_samples,factor = audioop.findfit(scratch_frag_, final_clapper_frag)
scratch_off_bytes = scratch_off_samples * width
print 'match at scratch:', 1.0*scratch_off_bytes/bytes_per_second_, 'sec', " factor =",factor
# Calculate the offset (shift) between the two fragments.
shift_sec = (scratch_off_bytes - b0) * 1.0 / bytes_per_second_
print 'shift =', shift_sec, 'seconds'
return shift_sec
def autoclapper(in_scratch_fname, in_final_fname, out_fname):
"""Read WAV- or AIFF-format files in_scratch_fname (a scratch audio track,
taken from a video) & in_final_fname (a final-quality audio track of
the same scene). Shift the 'final' stream to match the 'scratch' track,
and write it out to out_fname. The result is a file that can be used
directly as the video's sound-track."""
# Read in the input streams.
scratch = SoundFile( in_scratch_fname )
final = SoundFile( in_final_fname )
print 'scratch', scratch.stream.getparams()
print 'final ', final.stream.getparams()
scratch_frag = coerce_lin(scratch, final)
final_frag = final.read_lin()
## Shift final_frag to match scratch_frag
shift_sec = findfit(scratch_frag, final_frag, final)
shift_frames = int(shift_sec * final.stream.getframerate())
shift_bytes = shift_frames * final.bytes_per_frame()
print 'shift', shift_bytes, 'bytes'
if shift_bytes > 0:
final_frag = '\0' * shift_bytes + final_frag
elif shift_bytes < 0:
final_frag = final_frag[-shift_bytes:]
## Set final_frag length to match scratch_frag
if len(final_frag) > len(scratch_frag):
final_frag = final_frag[:len(scratch_frag)]
elif len(final_frag) < len(scratch_frag):
final_frag += '\0' * (len(scratch_frag) - len(final_frag))
# Write out the result.
sink = SoundFile( out_fname, final )
sink.write_lin( final_frag )
sink.close()
if __name__=='__main__':
import sys
if sys.argv[1] in ('-h', '--help', '-?'):
print 'syntax: python autoclapper.py IN_SCRATCH_FNAME IN_FINAL_FNAME OUT_FNAME'
print
print autoclapper.__doc__
print """
You can use "avconv" (or "ffmpeg") to extract audio tracks from video.
Example:
$ avconv -i raw_video.avi scratch.wav
$ python autoclapper.py scratch.wav raw_final.wav synced_final.wav
$ avconv -i raw_video.avi -i synced_final.wav -map 0:0 -map 1:0 -codec copy video.avi
"""
sys.exit(0)
in_scratch_fname = sys.argv[1]
in_final_fname = sys.argv[2]
out_fname = sys.argv[3]
autoclapper(in_scratch_fname, in_final_fname, out_fname)