Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add tts play example for .Net. #676

Merged
merged 3 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
matrix:
os: [ubuntu-latest]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
manylinux: [manylinux2014, manylinux_2_28]
manylinux: [manylinux2014] #, manylinux_2_28]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
matrix:
os: [ubuntu-latest]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
manylinux: [manylinux2014, manylinux_2_28]
manylinux: [manylinux2014] #, manylinux_2_28]


steps:
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/build-wheels-win32.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ jobs:
run: |
ls -lh ./wheelhouse/

ls -lh ./wheelhouse/*.whl

- uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.python-version }}
Expand Down
10 changes: 10 additions & 0 deletions dotnet-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,14 @@ Please refer to the documentation
https://k2-fsa.github.io/sherpa/onnx/csharp-api/index.html
for details.

```bash
dotnet new console -n offline-tts-play
dotnet sln ./sherpa-onnx.sln add ./offline-tts-play
```

```bash
dotnet nuget locals all --list
dotnet nuget locals all --clear
```

[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
1 change: 1 addition & 0 deletions dotnet-examples/offline-tts-play/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
run-piper.sh
285 changes: 285 additions & 0 deletions dotnet-examples/offline-tts-play/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to use a non-streaming TTS model for text-to-speech
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
//
// Note that you need a speaker to run this file since it will play
// the generated audio as it is generating.

using CommandLine.Text;
using CommandLine;
using PortAudioSharp;
using SherpaOnnx;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;
using System;

class OfflineTtsPlayDemo
{
class Options
{

[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; }

[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; }

[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
public float LengthScale { get; set; }

[Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
public float NoiseScale { get; set; }

[Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")]
public float NoiseScaleW { get; set; }

[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
public string Lexicon { get; set; }

[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
public string Tokens { get; set; }

[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
public int MaxNumSentences { get; set; }

[Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
public int Debug { get; set; }

[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
public string Model { get; set; }

[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
public int SpeakerId { get; set; }

[Option("text", Required = true, HelpText = "Text to synthesize")]
public string Text { get; set; }

[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
public string OutputFilename { get; set; }
}

static void Main(string[] args)
{
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
var parserResult = parser.ParseArguments<Options>(args);

parserResult
.WithParsed<Options>(options => Run(options))
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
}

private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
{
string usage = @"
# vits-aishell3

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2

dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--sid=66 \
--debug=1 \
--output-filename=./aishell3-66.wav \
--text=这是一个语音合成测试

# Piper models

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2

dotnet run \
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
--vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
--vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
--debug=1 \
--output-filename=./amy.wav \
--text='This is a text to speech application in dotnet with Next Generation Kaldi'

Please refer to
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
to download more models.
";

var helpText = HelpText.AutoBuild(result, h =>
{
h.AdditionalNewLineAfterOption = false;
h.Heading = usage;
h.Copyright = "Copyright (c) 2024 Xiaomi Corporation";
return HelpText.DefaultParsingErrorsHandler(result, h);
}, e => e);
Console.WriteLine(helpText);
}


private static void Run(Options options)
{
OfflineTtsConfig config = new OfflineTtsConfig();
config.Model.Vits.Model = options.Model;
config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens;
config.Model.Vits.DataDir = options.DataDir;
config.Model.Vits.NoiseScale = options.NoiseScale;
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
config.Model.Vits.LengthScale = options.LengthScale;
config.Model.NumThreads = 1;
config.Model.Debug = options.Debug;
config.Model.Provider = "cpu";
config.RuleFsts = options.RuleFsts;
config.MaxNumSentences = options.MaxNumSentences;

OfflineTts tts = new OfflineTts(config);
float speed = 1.0f / options.LengthScale;
int sid = options.SpeakerId;


Console.WriteLine(PortAudio.VersionInfo.versionText);
PortAudio.Initialize();
Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");

for (int i = 0; i != PortAudio.DeviceCount; ++i)
{
Console.WriteLine($" Device {i}");
DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
Console.WriteLine($" Name: {deviceInfo.name}");
Console.WriteLine($" Max output channels: {deviceInfo.maxOutputChannels}");
Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
}
int deviceIndex = PortAudio.DefaultOutputDevice;
if (deviceIndex == PortAudio.NoDevice)
{
Console.WriteLine("No default output device found. Please use ../offline-tts instead");
Environment.Exit(1);
}

DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
Console.WriteLine();
Console.WriteLine($"Use output default device {deviceIndex} ({info.name})");

StreamParameters param = new StreamParameters();
param.device = deviceIndex;
param.channelCount = 1;
param.sampleFormat = SampleFormat.Float32;
param.suggestedLatency = info.defaultLowOutputLatency;
param.hostApiSpecificStreamInfo = IntPtr.Zero;

// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
BlockingCollection<float[]> dataItems = new BlockingCollection<float[]>();

var MyCallback = (IntPtr samples, int n) =>
{
float[] data = new float[n];

Marshal.Copy(samples, data, 0, n);

dataItems.Add(data);
};

bool playFinished = false;

float[] lastSampleArray = null;
int lastIndex = 0; // not played

PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output,
UInt32 frameCount,
ref StreamCallbackTimeInfo timeInfo,
StreamCallbackFlags statusFlags,
IntPtr userData
) =>
{
if (dataItems.IsCompleted && lastSampleArray == null && lastIndex == 0)
{
Console.WriteLine($"Finished playing");
playFinished = true;
return StreamCallbackResult.Complete;
}

int expected = Convert.ToInt32(frameCount);
int i = 0;

while ((lastSampleArray != null || dataItems.Count != 0) && (i < expected))
{
int needed = expected - i;

if (lastSampleArray != null)
{
int remaining = lastSampleArray.Length - lastIndex;
if (remaining >= needed)
{
float[] this_block = lastSampleArray.Skip(lastIndex).Take(needed).ToArray();
lastIndex += needed;
if (lastIndex == lastSampleArray.Length)
{
lastSampleArray = null;
lastIndex = 0;
}

Marshal.Copy(this_block, 0, IntPtr.Add(output, i * sizeof(float)), needed);
return StreamCallbackResult.Continue;
}

float[] this_block2 = lastSampleArray.Skip(lastIndex).Take(remaining).ToArray();
lastIndex = 0;
lastSampleArray = null;

Marshal.Copy(this_block2, 0, IntPtr.Add(output, i * sizeof(float)), remaining);
i += remaining;
continue;
}

if (dataItems.Count != 0)
{
lastSampleArray = dataItems.Take();
lastIndex = 0;
}
}

if (i < expected)
{
int sizeInBytes = (expected - i) * 4;
Marshal.Copy(new byte[sizeInBytes], 0, IntPtr.Add(output, i * sizeof(float)), sizeInBytes);
}

return StreamCallbackResult.Continue;
};

PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: null, outParams: param, sampleRate: tts.SampleRate,
framesPerBuffer: 0,
streamFlags: StreamFlags.ClipOff,
callback: playCallback,
userData: IntPtr.Zero
);

stream.Start();

OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback);

OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback);
bool ok = audio.SaveToWaveFile(options.OutputFilename);

if (ok)
{
Console.WriteLine($"Wrote to {options.OutputFilename} succeeded!");
}
else
{
Console.WriteLine($"Failed to write {options.OutputFilename}");
}
dataItems.CompleteAdding();

while (!playFinished)
{
Thread.Sleep(100); // 100ms
}
}
}
21 changes: 21 additions & 0 deletions dotnet-examples/offline-tts-play/offline-tts-play.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>offline_tts_play</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.9.1" />
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
<PackageReference Include="PortAudioSharp2" Version="*" />
</ItemGroup>

</Project>
6 changes: 6 additions & 0 deletions dotnet-examples/sherpa-onnx.sln
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-recognition-from-mic
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\offline-tts.csproj", "{72196886-7143-4043-96E2-BCACEC6C79EB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -36,5 +38,9 @@ Global
{72196886-7143-4043-96E2-BCACEC6C79EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.Build.0 = Release|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
21 changes: 21 additions & 0 deletions scripts/dotnet/examples/offline-tts-play.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>offline_tts_play</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.9.1" />
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
<PackageReference Include="PortAudioSharp2" Version="*" />
</ItemGroup>

</Project>
Loading
Loading