From 28363633c7c7f1d6fc3baf5535e68f7a619c943e Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Thu, 10 Aug 2023 00:16:10 -0400 Subject: [PATCH 1/3] follow opencc conversion chain --- src/rime/gear/simplifier.cc | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/rime/gear/simplifier.cc b/src/rime/gear/simplifier.cc index 4ff3c75147..d6c271aa70 100644 --- a/src/rime/gear/simplifier.cc +++ b/src/rime/gear/simplifier.cc @@ -46,19 +46,36 @@ class Opencc { } bool ConvertWord(const string& text, vector* forms) { - if (dict_ == nullptr) - return false; - opencc::Optional item = dict_->Match(text); - if (item.IsNull()) { - // Match not found + if (converter_ == nullptr) { return false; - } else { - const opencc::DictEntry* entry = item.Get(); - for (auto&& value : entry->Values()) { - forms->push_back(std::move(value)); + } + const list conversions = + converter_->GetConversionChain()->GetConversions(); + vector original_words{text}; + for (auto conversion : conversions) { + opencc::DictPtr dict = conversion->GetDict(); + if (dict == nullptr) { + return false; + } + set word_set; + vector converted_words; + for (const auto& original_word : original_words) { + opencc::Optional item = + dict->Match(original_word); + if (item.IsNull()) { + continue; + } + const opencc::DictEntry* entry = item.Get(); + for (const auto& converted_word : entry->Values()) { + if (word_set.insert(converted_word).second) { + converted_words.push_back(converted_word); + } + } } - return forms->size() > 0; + original_words.swap(converted_words); } + *forms = std::move(original_words); + return forms->size() > 0; } bool RandomConvertText(const string& text, string* simplified) { From bbf99f3a463209007f918080399c28382c37b36a Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Fri, 11 Aug 2023 20:39:14 -0400 Subject: [PATCH 2/3] when a dict doesn't contain a word, pass as-is --- src/rime/gear/simplifier.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/rime/gear/simplifier.cc b/src/rime/gear/simplifier.cc index d6c271aa70..16e1621851 100644 --- a/src/rime/gear/simplifier.cc +++ b/src/rime/gear/simplifier.cc @@ -52,6 +52,7 @@ class Opencc { const list conversions = converter_->GetConversionChain()->GetConversions(); vector original_words{text}; + bool matched = false; for (auto conversion : conversions) { opencc::DictPtr dict = conversion->GetDict(); if (dict == nullptr) { @@ -63,8 +64,13 @@ class Opencc { opencc::Optional item = dict->Match(original_word); if (item.IsNull()) { + // Current dictionary doesn't convert the word. We need to keep it for + // other dicts in the chain. e.g. s2t.json expands 里 to 里 and 裏, + // then t2tw.json passes 里 as-is and converts 裏 to 裡. + converted_words.push_back(original_word); continue; } + matched = true; const opencc::DictEntry* entry = item.Get(); for (const auto& converted_word : entry->Values()) { if (word_set.insert(converted_word).second) { @@ -74,6 +80,10 @@ class Opencc { } original_words.swap(converted_words); } + if (!matched) { + // No dictionary contains the word + return false; + } *forms = std::move(original_words); return forms->size() > 0; } From 58f6c0cc67e0db49fc8251393442e4f87a33f24f Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Sat, 12 Aug 2023 09:49:33 -0400 Subject: [PATCH 3/3] de-duplication --- src/rime/gear/simplifier.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rime/gear/simplifier.cc b/src/rime/gear/simplifier.cc index 16e1621851..4b89e80810 100644 --- a/src/rime/gear/simplifier.cc +++ b/src/rime/gear/simplifier.cc @@ -67,7 +67,9 @@ class Opencc { // Current dictionary doesn't convert the word. We need to keep it for // other dicts in the chain. e.g. s2t.json expands 里 to 里 and 裏, // then t2tw.json passes 里 as-is and converts 裏 to 裡. - converted_words.push_back(original_word); + if (word_set.insert(original_word).second) { + converted_words.push_back(original_word); + } continue; } matched = true;