Skip to content

Commit

Permalink
Fixed a possible endless loop in unbalanced translation splitting, fi…
Browse files Browse the repository at this point in the history
…xed double translation merging. Added alternative text insertion command to chrome extension for cases in which paste command does not work (as it does not on some sites)
  • Loading branch information
TommiNieminen committed Jul 29, 2022
1 parent 7097249 commit ee7a200
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
2 changes: 2 additions & 0 deletions OpusCatChromeExtension/extractsource.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ chrome.runtime.onMessage.addListener(
var blob = new Blob([request.opusCatTranslationToPaste], {type: 'text/plain'});
var item = new ClipboardItem({'text/plain': blob});
navigator.clipboard.write([item]).then(() => document.execCommand('paste'));
//If clipboard paste does not work, use insertText
//document.execCommand('insertText',false,request.opusCatTranslationToPaste);
}
}
);
Expand Down
5 changes: 5 additions & 0 deletions OpusCatMTEngine/Marian/MarianProcess.cs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,11 @@ private List<string> SplitOnMiddle(string preprocessedLine, string splitPattern)
{
var lineLength = preprocessedLine.Length;
var splitterMatches = Regex.Matches(preprocessedLine, Regex.Escape(splitPattern)).Cast<Match>().ToList();

//Filter out very uneven splits (where one split is less than third of line length), since those won't solve the problem
var midpoint = lineLength / 2;
splitterMatches = splitterMatches.Where(x => Math.Abs(midpoint - x.Index) < (lineLength / 3)).ToList();

if (splitterMatches.Any())
{
var splitPoint =
Expand Down
4 changes: 1 addition & 3 deletions OpusCatMTEngine/Marian/TranslationPair.cs
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,7 @@ private Dictionary<int, List<int>> GenerateDesegmentedAlignment(string sourceSen
//This joins up two translation pairs, used to produce single output when input
// has been segmented into separate sentences
internal void AppendTranslationPair(TranslationPair translationPart)
{
this.translation = $"{this.translation} {translationPart.translation}";

{
this.SegmentedAlignmentSourceToTarget = this.JoinAlignments(
this.SegmentedAlignmentSourceToTarget, translationPart.SegmentedAlignmentSourceToTarget,
this.SegmentedSourceSentence.Length, this.SegmentedTranslation.Length);
Expand Down

0 comments on commit ee7a200

Please # to comment.