diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 4a4108c989..3f1c9f8483 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -35,7 +35,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, ubuntu-22.04-arm] steps: - uses: actions/checkout@v4 @@ -69,7 +69,7 @@ jobs: ls -lh install/lib ls -lh install/include - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./install/lib/libsherpa-onnx-c-api.so echo "---" readelf -d ./install/lib/libsherpa-onnx-c-api.so @@ -79,6 +79,38 @@ jobs: otool -L ./install/lib/libsherpa-onnx-c-api.dylib fi + - name: Test FireRedAsr + shell: bash + run: | + gcc -o fire-red-asr-c-api ./c-api-examples/fire-red-asr-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh fire-red-asr-c-api + + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then + ldd ./fire-red-asr-c-api + echo "----" + readelf -d ./fire-red-asr-c-api + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 + + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 + echo "---" + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./fire-red-asr-c-api + + rm -rf sherpa-onnx-fire-red-asr-* + - name: Test kws (zh) shell: bash run: | @@ -301,7 +333,7 @@ jobs: ls -lh vad-sense-voice-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./vad-sense-voice-c-api echo "----" readelf -d ./vad-sense-voice-c-api @@ -340,7 +372,7 @@ jobs: ls -lh sense-voice-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./sense-voice-c-api echo "----" readelf -d ./sense-voice-c-api @@ -373,7 +405,7 @@ jobs: ls -lh whisper-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./whisper-c-api echo "----" readelf -d ./whisper-c-api @@ -405,7 +437,7 @@ jobs: ls -lh zipformer-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./zipformer-c-api echo "----" readelf -d ./zipformer-c-api @@ -437,7 +469,7 @@ jobs: ls -lh streaming-zipformer-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./streaming-zipformer-c-api echo "----" readelf -d ./streaming-zipformer-c-api @@ -469,7 +501,7 @@ jobs: ls -lh paraformer-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./paraformer-c-api echo "----" readelf -d ./paraformer-c-api @@ -501,7 +533,7 @@ jobs: ls -lh streaming-paraformer-c-api - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then ldd ./streaming-paraformer-c-api echo "----" readelf -d ./streaming-paraformer-c-api diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 44c5814a53..724977a8b6 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -47,6 +47,9 @@ target_link_libraries(add-punctuation-c-api sherpa-onnx-c-api) add_executable(whisper-c-api whisper-c-api.c) target_link_libraries(whisper-c-api sherpa-onnx-c-api) +add_executable(fire-red-asr-c-api fire-red-asr-c-api.c) +target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api) + add_executable(sense-voice-c-api sense-voice-c-api.c) target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) diff --git a/c-api-examples/fire-red-asr-c-api.c b/c-api-examples/fire-red-asr-c-api.c new file mode 100644 index 0000000000..1e70622979 --- /dev/null +++ b/c-api-examples/fire-red-asr-c-api.c @@ -0,0 +1,84 @@ +// c-api-examples/fire-red-asr-c-api.c +// +// Copyright (c) 2025 Xiaomi Corporation + +// We assume you have pre-downloaded the FireRedAsr model +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +// An example is given below: +// +// clang-format off +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 +// tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 +// rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 +// +// clang-format on + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +int32_t main() { + const char *wav_filename = + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"; + const char *encoder_filename = + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"; + const char *decoder_filename = + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"; + const char *tokens_filename = + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"; + const char *provider = "cpu"; + + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + fprintf(stderr, "Failed to read %s\n", wav_filename); + return -1; + } + + // Offline model config + SherpaOnnxOfflineModelConfig offline_model_config; + memset(&offline_model_config, 0, sizeof(offline_model_config)); + offline_model_config.debug = 1; + offline_model_config.num_threads = 1; + offline_model_config.provider = provider; + offline_model_config.tokens = tokens_filename; + offline_model_config.fire_red_asr.encoder = encoder_filename; + offline_model_config.fire_red_asr.decoder = decoder_filename; + + // Recognizer config + SherpaOnnxOfflineRecognizerConfig recognizer_config; + memset(&recognizer_config, 0, sizeof(recognizer_config)); + recognizer_config.decoding_method = "greedy_search"; + recognizer_config.model_config = offline_model_config; + + const SherpaOnnxOfflineRecognizer *recognizer = + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); + + if (recognizer == NULL) { + fprintf(stderr, "Please check your config!\n"); + + SherpaOnnxFreeWave(wave); + + return -1; + } + + const SherpaOnnxOfflineStream *stream = + SherpaOnnxCreateOfflineStream(recognizer); + + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, + wave->num_samples); + SherpaOnnxDecodeOfflineStream(recognizer, stream); + const SherpaOnnxOfflineRecognizerResult *result = + SherpaOnnxGetOfflineStreamResult(stream); + + fprintf(stderr, "Decoded text: %s\n", result->text); + + SherpaOnnxDestroyOfflineRecognizerResult(result); + SherpaOnnxDestroyOfflineStream(stream); + SherpaOnnxDestroyOfflineRecognizer(recognizer); + SherpaOnnxFreeWave(wave); + + return 0; +} diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 081c983dbb..eab473fb2f 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -460,6 +460,12 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( recognizer_config.model_config.moonshine.cached_decoder = SHERPA_ONNX_OR(config->model_config.moonshine.cached_decoder, ""); + recognizer_config.model_config.fire_red_asr.encoder = + SHERPA_ONNX_OR(config->model_config.fire_red_asr.encoder, ""); + + recognizer_config.model_config.fire_red_asr.decoder = + SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, ""); + recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, ""); recognizer_config.lm_config.scale = diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 39198ff287..43567406e0 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -389,6 +389,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { int32_t tail_paddings; } SherpaOnnxOfflineWhisperModelConfig; +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig { + const char *encoder; + const char *decoder; +} SherpaOnnxOfflineFireRedAsrModelConfig; + SHERPA_ONNX_API typedef struct SherpaOnnxOfflineMoonshineModelConfig { const char *preprocessor; const char *encoder; @@ -432,6 +437,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { const char *telespeech_ctc; SherpaOnnxOfflineSenseVoiceModelConfig sense_voice; SherpaOnnxOfflineMoonshineModelConfig moonshine; + SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr; } SherpaOnnxOfflineModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {