-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathtranscribe.py
45 lines (35 loc) · 1.38 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
import warnings
import sherpa_onnx
from .interface import TranscribeConfig, TranscribeResult, Subword
from .audio import audio_to_file, pad_audio, norm_audio
PAD_SECONDS = 0.9
TOO_LONG_SECONDS = 30.0
def transcribe(model, audio, config=None):
"""Inference audio data using K2 model
Args:
model (sherpa_onnx.OfflineRecognizer): ReazonSpeech model
audio (AudioData): Audio data to transcribe
config (TranscribeConfig): Additional settings
Returns:
TranscribeResult
"""
if config is None:
config = TranscribeConfig()
audio = pad_audio(norm_audio(audio), PAD_SECONDS)
# Show warning if a long audio input is detected.
duration = audio.waveform.shape[0] / audio.samplerate
if duration > TOO_LONG_SECONDS:
warnings.warn(
f"Passing a long audio input ({duration:.1f}s) is not recommended, "
"because K2 will require a large amount of memory. "
"Read the upstream discussion for more details: "
"https://github.com/k2-fsa/icefall/issues/1680"
)
stream = model.create_stream()
stream.accept_waveform(audio.samplerate, audio.waveform)
model.decode_stream(stream)
subwords = []
for t, s in zip(stream.result.tokens, stream.result.timestamps):
subwords.append(Subword(token=t, seconds=s))
return TranscribeResult(stream.result.text, subwords)