diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 047e3664..1a5377be 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -34,6 +34,9 @@ protected ScriptureRefUsfmParserHandlerBase() protected ScriptureTextType CurrentTextType => _curTextType.Count == 0 ? ScriptureTextType.None : _curTextType.Peek(); + private static readonly string[] EmbedStyles = new[] { "f", "fe", "fig", "fm", "x" }; + private static readonly char[] EmbedPartStartCharStyles = new[] { 'f', 'x', 'z' }; + public override void EndUsfm(UsfmParserState state) { EndVerseText(state); @@ -213,14 +216,14 @@ public override void StartChar( IReadOnlyList attributes ) { - if (IsEmbedPart(markerWithoutPlus) & InNoteText) + if (IsEmbedPartStyle(markerWithoutPlus) & InNoteText) _inNestedEmbed = true; // if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse // segment CheckConvertVerseParaToNonVerse(state); - if (IsEmbedCharacter(markerWithoutPlus)) + if (IsEmbedStyle(markerWithoutPlus)) { _inEmbed = true; StartEmbed(state, markerWithoutPlus); @@ -239,7 +242,7 @@ public override void EndChar( bool closed ) { - if (IsEmbedPart(marker)) + if (IsEmbedPartStyle(marker)) { if (_inNestedEmbed) { @@ -250,7 +253,7 @@ bool closed EndNoteText(state); } } - if (IsEmbedCharacter(marker)) + if (IsEmbedStyle(marker)) { EndEmbed(state, marker, attributes, closed); _inEmbed = false; @@ -343,7 +346,7 @@ private void EndParentElement() private void EndEmbedElements() { - if (_curElements.Count > 0 && IsEmbedCharacter(_curElements.Peek().Name)) + if (_curElements.Count > 0 && IsEmbedStyle(_curElements.Peek().Name)) _curElements.Pop(); } @@ -380,14 +383,17 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state) public bool InEmbed(string marker) { - return _inEmbed || IsEmbedCharacter(marker); + return _inEmbed || IsEmbedStyle(marker); } public bool IsInNestedEmbed(string marker) { return _inNestedEmbed || ( - !(marker is null) && marker.StartsWith("+") && marker.Length > 1 && IsEmbedPart(marker.Substring(1)) + !(marker is null) + && marker.StartsWith("+") + && marker.Length > 1 + && IsEmbedPartStyle(marker.Substring(1)) ); } @@ -396,14 +402,14 @@ private static bool IsNoteText(string marker) return marker == "ft"; } - public static bool IsEmbedPart(string marker) + public static bool IsEmbedPartStyle(string marker) { - return !(marker is null) && marker.Length > 0 && marker[0].IsOneOf('f', 'x', 'z'); + return !(marker is null) && marker.Length > 0 && marker[0].IsOneOf(EmbedPartStartCharStyles); } - private static bool IsEmbedCharacter(string marker) + private static bool IsEmbedStyle(string marker) { - return !(marker is null) && marker.IsOneOf("f", "fe", "fig", "fm", "x"); + return !(marker is null) && marker.IsOneOf(EmbedStyles); } } } diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index a9327306..c5317d6c 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -385,7 +385,7 @@ private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true) string marker = state?.Token?.Marker; bool inEmbed = InEmbed(marker); bool inNestedEmbed = IsInNestedEmbed(marker); - bool isStyleTag = marker != null && !IsEmbedPart(marker); + bool isStyleTag = marker != null && !IsEmbedPartStyle(marker); bool existingText = state .Tokens.Skip(_tokenIndex) diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index 65d5a10d..cd6c6d4a 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -258,7 +258,9 @@ public override void Text(UsfmParserState state, string text) } else if (text.Length > 0 && (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText)) { - if (InEmbed(state.Token.Marker) && (!InNoteText || IsInNestedEmbed(state.Token.Marker))) + bool isEmbedOrNestedDontUpdate = + InEmbed(state.Token.Marker) && (!InNoteText || IsInNestedEmbed(state.Token.Marker)); + if (isEmbedOrNestedDontUpdate) return; if (