diff --git a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
index d2184abc98..db4d56afb0 100644
--- a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
+++ b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj
@@ -14,26 +14,8 @@
4
true
true
-
-
- AnyCPU
- true
- full
- false
- bin\Debug\
- DEBUG;TRACE
- prompt
- 4
-
-
- AnyCPU
- pdbonly
- true
- bin\Release\
- TRACE
- prompt
- 4
- true
+
+
true
@@ -59,13 +41,14 @@
- ..\packages\CSCore.1.2.1.2\lib\net35-client\CSCore.dll
+ packages\CSCore.1.2.1.2\lib\net35-client\CSCore.dll
-
- ..\packages\NAudio.1.8.5\lib\net35\NAudio.dll
+
+ packages\NAudio.1.9.0\lib\net35\NAudio.dll
+
@@ -125,7 +108,7 @@
-
+
{56de4091-bbbe-47e4-852d-7268b33b971f}
DeepSpeechClient
diff --git a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln
new file mode 100644
index 0000000000..96b4e6bc0e
--- /dev/null
+++ b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln
@@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.28307.421
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\..\..\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.ActiveCfg = Debug|x64
+ {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.Build.0 = Debug|x64
+ {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.ActiveCfg = Release|x64
+ {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.Build.0 = Release|x64
+ {56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|x64
+ {56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|x64
+ {56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.ActiveCfg = Release|x64
+ {56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {19C58802-CCEC-4FD1-8D17-A6EB766116F7}
+ EndGlobalSection
+EndGlobal
diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
index aeb4b31582..e855f181c6 100644
--- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
+++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs
@@ -79,14 +79,8 @@ private void Window_Loaded(object sender, RoutedEventArgs e)
{
try
{
- if (_sttClient.CreateModel("output_graph.pbmm", N_CEP, N_CONTEXT, "alphabet.txt", BEAM_WIDTH) == 0)
- {
- Dispatcher.Invoke(() => { EnableControls(); });
- }
- else
- {
- MessageBox.Show("Model load failed.");
- }
+ _sttClient.CreateModel("output_graph.pbmm", N_CEP, N_CONTEXT, "alphabet.txt", BEAM_WIDTH);
+ Dispatcher.Invoke(() => { EnableControls(); });
}
catch (Exception ex)
{
@@ -161,18 +155,12 @@ await Task.Run(() =>
{
try
{
- if (_sttClient.EnableDecoderWithLM("alphabet.txt", "lm.binary", "trie", LM_ALPHA, LM_BETA) != 0)
- {
- MessageBox.Show("Error loading LM.");
- Dispatcher.Invoke(() => btnEnableLM.IsEnabled = true);
- }
- else
- {
- Dispatcher.Invoke(() => lblStatus.Content = "LM loaded.");
- }
+ _sttClient.EnableDecoderWithLM("alphabet.txt", "lm.binary", "trie", LM_ALPHA, LM_BETA);
+ Dispatcher.Invoke(() => lblStatus.Content = "LM loaded.");
}
catch (Exception ex)
{
+ Dispatcher.Invoke(() => btnEnableLM.IsEnabled = true);
MessageBox.Show(ex.Message);
}
});
diff --git a/examples/net_framework/DeepSpeechWPF/packages.config b/examples/net_framework/DeepSpeechWPF/packages.config
index 6cab284370..4b03fe6aa5 100644
--- a/examples/net_framework/DeepSpeechWPF/packages.config
+++ b/examples/net_framework/DeepSpeechWPF/packages.config
@@ -1,5 +1,5 @@
-
+
\ No newline at end of file
diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
index 255d26f834..5271e2e70c 100644
--- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
@@ -5,6 +5,7 @@
using System;
using System.IO;
using System.Runtime.InteropServices;
+using DeepSpeechClient.Enums;
namespace DeepSpeechClient
{
@@ -35,8 +36,8 @@ public DeepSpeech()
/// The context window the model was trained with.
/// The path to the configuration file specifying the alphabet used by the network.
/// The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
- /// Zero on success, non-zero on failure.
- public unsafe int CreateModel(string aModelPath, uint aNCep,
+ /// Thrown when the native binary failed to create the model.
+ public unsafe void CreateModel(string aModelPath, uint aNCep,
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
{
string exceptionMessage = null;
@@ -61,16 +62,53 @@ public unsafe int CreateModel(string aModelPath, uint aNCep,
{
throw new FileNotFoundException(exceptionMessage);
}
- int result = NativeImp.DS_CreateModel(aModelPath,
+ var resultCode = NativeImp.DS_CreateModel(aModelPath,
aNCep,
aNContext,
aAlphabetConfigPath,
aBeamWidth,
ref _modelStatePP);
+ EvaluateResultCode(resultCode);
_modelStateP = *_modelStatePP;
- return result;
-
+ }
+ ///
+ /// Evaluate the result code and will raise an exception if necessary.
+ ///
+ /// Native result code.
+ private void EvaluateResultCode(ErrorCodes resultCode)
+ {
+ switch (resultCode)
+ {
+ case ErrorCodes.DS_ERR_OK:
+ break;
+ case ErrorCodes.DS_ERR_NO_MODEL:
+ throw new ArgumentException("Missing model information.");
+ case ErrorCodes.DS_ERR_INVALID_ALPHABET:
+ throw new ArgumentException("Invalid alphabet file or invalid alphabet size.");
+ case ErrorCodes.DS_ERR_INVALID_SHAPE:
+ throw new ArgumentException("Invalid model shape.");
+ case ErrorCodes.DS_ERR_INVALID_LM:
+ throw new ArgumentException("Invalid language model file.");
+ case ErrorCodes.DS_ERR_FAIL_INIT_MMAP:
+ throw new ArgumentException("Failed to initialize memory mapped model.");
+ case ErrorCodes.DS_ERR_FAIL_INIT_SESS:
+ throw new ArgumentException("Failed to initialize the session.");
+ case ErrorCodes.DS_ERR_FAIL_INTERPRETER:
+ throw new ArgumentException("Interpreter failed.");
+ case ErrorCodes.DS_ERR_FAIL_RUN_SESS:
+ throw new ArgumentException("Failed to run the session.");
+ case ErrorCodes.DS_ERR_FAIL_CREATE_STREAM:
+ throw new ArgumentException("Error creating the stream.");
+ case ErrorCodes.DS_ERR_FAIL_READ_PROTOBUF:
+ throw new ArgumentException("Error reading the proto buffer model file.");
+ case ErrorCodes.DS_ERR_FAIL_CREATE_SESS:
+ throw new ArgumentException("Error failed to create session.");
+ case ErrorCodes.DS_ERR_MODEL_INCOMPATIBLE:
+ throw new ArgumentException("Error incompatible model.");
+ default:
+ throw new ArgumentException("Unknown error, please make sure you are using the correct native binary.");
+ }
}
///
@@ -89,8 +127,8 @@ public unsafe void Dispose()
/// The path to the trie file build from the same vocabulary as the language model binary.
/// The alpha hyperparameter of the CTC decoder. Language Model weight.
/// The beta hyperparameter of the CTC decoder. Word insertion weight.
- /// Zero on success, non-zero on failure (invalid arguments).
- public unsafe int EnableDecoderWithLM(string aAlphabetConfigPath,
+ /// Thrown when the native binary failed to enable decoding with a language model.
+ public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
string aLMPath, string aTriePath,
float aLMAlpha, float aLMBeta)
{
@@ -109,12 +147,13 @@ public unsafe int EnableDecoderWithLM(string aAlphabetConfigPath,
throw new FileNotFoundException(exceptionMessage);
}
- return NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
+ var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
aAlphabetConfigPath,
aLMPath,
aTriePath,
aLMAlpha,
aLMBeta);
+ EvaluateResultCode(resultCode);
}
///
@@ -169,10 +208,11 @@ public unsafe void PrintVersions()
/// One timestep is equivalent to two window lengths(20ms).
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).
/// The sample-rate of the audio signal
- /// Zero for success, non-zero on failure
- public unsafe int SetupStream(uint aPreAllocFrames, uint aSampleRate)
+ /// Thrown when the native binary failed to initialize the streaming mode.
+ public unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate)
{
- return NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
+ var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aPreAllocFrames, aSampleRate, ref _streamingStatePP);
+ EvaluateResultCode(resultCode);
}
///
diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj b/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj
index ded5102860..bd5a5a13de 100644
--- a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj
@@ -45,6 +45,7 @@
+
diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs
new file mode 100644
index 0000000000..019564c279
--- /dev/null
+++ b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs
@@ -0,0 +1,29 @@
+namespace DeepSpeechClient.Enums
+{
+ ///
+ /// Error codes from the native DeepSpeech binary.
+ ///
+ internal enum ErrorCodes
+ {
+ // OK
+ DS_ERR_OK = 0x0000,
+
+ // Missing invormations
+ DS_ERR_NO_MODEL = 0x1000,
+
+ // Invalid parameters
+ DS_ERR_INVALID_ALPHABET = 0x2000,
+ DS_ERR_INVALID_SHAPE = 0x2001,
+ DS_ERR_INVALID_LM = 0x2002,
+ DS_ERR_MODEL_INCOMPATIBLE = 0x2003,
+
+ // Runtime failures
+ DS_ERR_FAIL_INIT_MMAP = 0x3000,
+ DS_ERR_FAIL_INIT_SESS = 0x3001,
+ DS_ERR_FAIL_INTERPRETER = 0x3002,
+ DS_ERR_FAIL_RUN_SESS = 0x3003,
+ DS_ERR_FAIL_CREATE_STREAM = 0x3004,
+ DS_ERR_FAIL_READ_PROTOBUF = 0x3005,
+ DS_ERR_FAIL_CREATE_SESS = 0x3006,
+ }
+}
diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
index 5139cdfc1d..3d27a56cbc 100644
--- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
@@ -21,8 +21,8 @@ public interface IDeepSpeech : IDisposable
/// The context window the model was trained with.
/// The path to the configuration file specifying the alphabet used by the network.
/// The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.
- /// Zero on success, non-zero on failure.
- unsafe int CreateModel(string aModelPath, uint aNCep,
+ /// Thrown when the native binary failed to create the model.
+ unsafe void CreateModel(string aModelPath, uint aNCep,
uint aNContext,
string aAlphabetConfigPath,
uint aBeamWidth);
@@ -35,8 +35,8 @@ unsafe int CreateModel(string aModelPath, uint aNCep,
/// The path to the trie file build from the same vocabulary as the language model binary.
/// The alpha hyperparameter of the CTC decoder. Language Model weight.
/// The beta hyperparameter of the CTC decoder. Word insertion weight.
- /// Zero on success, non-zero on failure (invalid arguments).
- unsafe int EnableDecoderWithLM(string aAlphabetConfigPath,
+ /// Thrown when the native binary failed to enable decoding with a language model.
+ unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
string aLMPath,
string aTriePath,
float aLMAlpha,
@@ -88,8 +88,8 @@ unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
/// One timestep is equivalent to two window lengths(20ms).
/// If set to 0 we reserve enough frames for 3 seconds of audio(150).
/// The sample-rate of the audio signal
- /// Zero for success, non-zero on failure
- unsafe int SetupStream(uint aPreAllocFrames, uint aSampleRate);
+ /// Thrown when the native binary failed to initialize the streaming mode.
+ unsafe void SetupStream(uint aPreAllocFrames, uint aSampleRate);
///
/// Feeds audio samples to an ongoing streaming inference.
diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
index 52d9044c76..ec7d527b29 100644
--- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs
+++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
@@ -1,4 +1,5 @@
-using DeepSpeechClient.Structs;
+using DeepSpeechClient.Enums;
+using DeepSpeechClient.Structs;
using System;
using System.Runtime.InteropServices;
@@ -15,7 +16,7 @@ internal static class NativeImp
internal static extern void DS_PrintVersions();
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
- internal unsafe static extern int DS_CreateModel(string aModelPath,
+ internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
uint aNCep,
uint aNContext,
string aAlphabetConfigPath,
@@ -23,7 +24,7 @@ internal unsafe static extern int DS_CreateModel(string aModelPath,
ref ModelState** pint);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
- internal static unsafe extern int DS_EnableDecoderWithLM(ModelState** aCtx,
+ internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
string aAlphabetConfigPath,
string aLMPath,
string aTriePath,
@@ -47,7 +48,7 @@ internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(ModelState** aC
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
- internal static unsafe extern int DS_SetupStream(ModelState** aCtx,
+ internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
uint aPreAllocFrames,
uint aSampleRate, ref StreamingState** retval);
diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/DeepSpeechConsole/Program.cs
index 8f76457182..315a1a4084 100644
--- a/native_client/dotnet/DeepSpeechConsole/Program.cs
+++ b/native_client/dotnet/DeepSpeechConsole/Program.cs
@@ -25,7 +25,7 @@ static string MetadataToString(Metadata meta)
{
var nl = Environment.NewLine;
string retval =
- Environment.NewLine +$"Recognized text: {string.Join("", meta?.Items?.Select(x=>x.Character))} {nl}"
+ Environment.NewLine + $"Recognized text: {string.Join("", meta?.Items?.Select(x => x.Character))} {nl}"
+ $"Prob: {meta?.Probability} {nl}"
+ $"Item count: {meta?.Items?.Length} {nl}"
+ string.Join(nl, meta?.Items?.Select(x => $"Timestep : {x.Timestep} TimeOffset: {x.StartTime} Char: {x.Character}"));
@@ -60,43 +60,27 @@ static void Main(string[] args)
using (IDeepSpeech sttClient = new DeepSpeech())
{
- var result = 1;
- Console.WriteLine("Loading model...");
- stopwatch.Start();
try
{
- result = sttClient.CreateModel(
+ Console.WriteLine("Loading model...");
+ stopwatch.Start();
+ sttClient.CreateModel(
model ?? "output_graph.pbmm",
N_CEP, N_CONTEXT,
alphabet ?? "alphabet.txt",
BEAM_WIDTH);
- }
- catch (IOException ex)
- {
- Console.WriteLine("Error loading lm.");
- Console.WriteLine(ex.Message);
- }
- stopwatch.Stop();
- if (result == 0)
- {
+ stopwatch.Stop();
+
Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
stopwatch.Reset();
if (lm != null)
{
Console.WriteLine("Loadin LM...");
- try
- {
- result = sttClient.EnableDecoderWithLM(
- alphabet ?? "alphabet.txt",
- lm ?? "lm.binary",
- trie ?? "trie",
- LM_ALPHA, LM_BETA);
- }
- catch (IOException ex)
- {
- Console.WriteLine("Error loading lm.");
- Console.WriteLine(ex.Message);
- }
+ sttClient.EnableDecoderWithLM(
+ alphabet ?? "alphabet.txt",
+ lm ?? "lm.binary",
+ trie ?? "trie",
+ LM_ALPHA, LM_BETA);
}
@@ -123,15 +107,15 @@ static void Main(string[] args)
Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}");
Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}");
- Console.WriteLine((extended ? $"Extended result: ": "Recognized text: ") + speechResult);
+ Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult);
}
waveBuffer.Clear();
}
- else
+ catch (Exception ex)
{
- Console.WriteLine("Error loding the model.");
+ Console.WriteLine(ex.Message);
}
}
}
}
-}
+}
\ No newline at end of file