diff --git a/src/Engine/McBopomofoLM.cpp b/src/Engine/McBopomofoLM.cpp index 4ade300..989d906 100644 --- a/src/Engine/McBopomofoLM.cpp +++ b/src/Engine/McBopomofoLM.cpp @@ -22,13 +22,15 @@ // OTHER DEALINGS IN THE SOFTWARE. #include "McBopomofoLM.h" + #include -#include #include #include #include #include +#include "gramambular2/reading_grid.h" + namespace McBopomofo { McBopomofoLM::McBopomofoLM() @@ -122,7 +124,30 @@ std::vector McBopomofoLM::getUnig allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); } - allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); + // TODO(#118): Leaky abstraction. This relies on the impl. detail that we always use the default separator. + bool isKeyMultiSyllable = key.find(Formosa::Gramambular2::ReadingGrid::kDefaultSeparator) != std::string::npos; + if (isKeyMultiSyllable || allUnigrams.empty()) { + allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); + } else { + // Score rewrite. To ensure fairness, each user unigram is assigned a + // score that is slightly higher than its peer unigrams. + double topScore = std::numeric_limits::lowest(); + for (const auto& unigram : allUnigrams) { + if (unigram.score() > topScore) { + topScore = unigram.score(); + } + } + + constexpr double epsilon = 0.000000001; + topScore += epsilon; + + std::vector rewrittenUserUnigrams; + for (const auto& unigram : userUnigrams) { + rewrittenUserUnigrams.emplace_back(Formosa::Gramambular2::LanguageModel::Unigram(unigram.value(), topScore)); + } + allUnigrams.insert(allUnigrams.begin(), rewrittenUserUnigrams.begin(), rewrittenUserUnigrams.end()); + } + return allUnigrams; } @@ -191,7 +216,6 @@ std::string McBopomofoLM::convertMacro(const std::string& input) return input; } - std::vector McBopomofoLM::filterAndTransformUnigrams(const std::vector unigrams, const std::unordered_set& excludedValues, std::unordered_set& insertedValues) { std::vector results; @@ -237,5 +261,4 @@ bool McBopomofoLM::hasAssociatedPhrasesForKey(const std::string& key) return m_associatedPhrases.hasValuesForKey(key); } - } // namespace McBopomofo