Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add Swift API for MatchaTTS models. #1684

Merged
merged 2 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions .github/scripts/test-swift.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh

./run-tts-vits.sh
ls -lh
rm -rf vits-piper-*

./run-tts-matcha-zh.sh
ls -lh
rm -rf matcha-icefall-*

./run-tts-matcha-en.sh
ls -lh
rm -rf matcha-icefall-*

./run-speaker-diarization.sh
rm -rf *.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
Expand Down Expand Up @@ -38,8 +50,9 @@ popd
ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt

./run-tts.sh
ls -lh
rm -rf sherpa-onnx-whisper*
rm -f *.onnx
rm /Users/fangjun/Desktop/Obama.wav

./run-decode-file.sh
rm decode-file
Expand All @@ -48,5 +61,4 @@ sed -i.bak '20d' ./decode-file.swift

./run-decode-file-non-streaming.sh


ls -lh
2 changes: 1 addition & 1 deletion java-api-examples/run-non-streaming-tts-matcha-en.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fi
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ node ./test_vad_asr_non_streaming_sense_voice_microphone.js
### Text-to-speech with MatchaTTS models (English TTS)
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
Expand Down
2 changes: 1 addition & 1 deletion nodejs-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ You can use the following command to run it:

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
Expand Down
4 changes: 3 additions & 1 deletion swift-api-examples/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ decode-file
decode-file-non-streaming
generate-subtitles
spoken-language-identification
tts
tts-vits
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
!*.sh
*.bak
streaming-hlg-decode-file
keyword-spotting-from-file
add-punctuations
tts-matcha-zh
tts-matcha-en
33 changes: 28 additions & 5 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -719,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {

// offline tts
func sherpaOnnxOfflineTtsVitsModelConfig(
model: String,
lexicon: String,
tokens: String,
model: String = "",
lexicon: String = "",
tokens: String = "",
dataDir: String = "",
noiseScale: Float = 0.667,
noiseScaleW: Float = 0.8,
Expand All @@ -739,8 +739,30 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
dict_dir: toCPointer(dictDir))
}

func sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: String = "",
vocoder: String = "",
lexicon: String = "",
tokens: String = "",
dataDir: String = "",
noiseScale: Float = 0.667,
lengthScale: Float = 1.0,
dictDir: String = ""
) -> SherpaOnnxOfflineTtsMatchaModelConfig {
return SherpaOnnxOfflineTtsMatchaModelConfig(
acoustic_model: toCPointer(acousticModel),
vocoder: toCPointer(vocoder),
lexicon: toCPointer(lexicon),
tokens: toCPointer(tokens),
data_dir: toCPointer(dataDir),
noise_scale: noiseScale,
length_scale: lengthScale,
dict_dir: toCPointer(dictDir))
}

func sherpaOnnxOfflineTtsModelConfig(
vits: SherpaOnnxOfflineTtsVitsModelConfig,
vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
numThreads: Int = 1,
debug: Int = 0,
provider: String = "cpu"
Expand All @@ -749,7 +771,8 @@ func sherpaOnnxOfflineTtsModelConfig(
vits: vits,
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider)
provider: toCPointer(provider),
matcha: matcha
)
}

Expand Down
42 changes: 42 additions & 0 deletions swift-api-examples/run-tts-matcha-en.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

if [ ! -e ./tts ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts-matcha-en.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts-matcha-en

strip tts-matcha-en
else
echo "./tts-matcha-en exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-en
41 changes: 41 additions & 0 deletions swift-api-examples/run-tts-matcha-zh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
fi

if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi

if [ ! -e ./tts ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts-matcha-zh.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts-matcha-zh

strip tts-matcha-zh
else
echo "./tts-matcha-zh exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts-matcha-zh
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ if [ ! -e ./tts ]; then
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts.swift ./SherpaOnnx.swift \
./tts-vits.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts
-o tts-vits

strip tts
strip tts-vits
else
echo "./tts exists - skip building"
echo "./tts-vits exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts
./tts-vits
65 changes: 65 additions & 0 deletions swift-api-examples/tts-matcha-en.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
class MyClass {
func playSamples(samples: [Float]) {
print("Play \(samples.count) samples")
}
}

func run() {
let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
let vocoder = "./hifigan_v2.onnx"
let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: acousticModel,
vocoder: vocoder,
tokens: tokens,
dataDir: dataDir
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)

let myClass = MyClass()

// We use Unretained here so myClass must be kept alive as the callback is invoked
//
// See also
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

let callback: TtsCallbackWithArg = { samples, n, arg in
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
var savedSamples: [Float] = []
for index in 0..<n {
savedSamples.append(samples![Int(index)])
}

o.playSamples(samples: savedSamples)

// return 1 so that it continues generating
return 1
}

let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

let text =
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
let sid = 0
let speed: Float = 1.0

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test-matcha-en.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}

@main
struct App {
static func main() {
run()
}
}
68 changes: 68 additions & 0 deletions swift-api-examples/tts-matcha-zh.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
class MyClass {
func playSamples(samples: [Float]) {
print("Play \(samples.count) samples")
}
}

func run() {
let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
let vocoder = "./hifigan_v2.onnx"
let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
let tokens = "./matcha-icefall-zh-baker/tokens.txt"
let dictDir = "./matcha-icefall-zh-baker/dict"
let ruleFsts =
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
acousticModel: acousticModel,
vocoder: vocoder,
lexicon: lexicon,
tokens: tokens,
dictDir: dictDir
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)

let myClass = MyClass()

// We use Unretained here so myClass must be kept alive as the callback is invoked
//
// See also
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()

let callback: TtsCallbackWithArg = { samples, n, arg in
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
var savedSamples: [Float] = []
for index in 0..<n {
savedSamples.append(samples![Int(index)])
}

o.playSamples(samples: savedSamples)

// return 1 so that it continues generating
return 1
}

let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)

let text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
let sid = 0
let speed: Float = 1.0

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test-matcha-zh.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}

@main
struct App {
static func main() {
run()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func run() {

let audio = tts.generateWithCallbackWithArg(
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
let filename = "test.wav"
let filename = "test-vits-en.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
Expand Down
Loading