From 5d19288f1860dbf2a519a1b954bf11a775f96a3a Mon Sep 17 00:00:00 2001 From: WhiredPlanck Date: Sat, 25 Nov 2023 21:16:43 +0800 Subject: [PATCH] fix: correct the split behavior during collecting dict entries (#762) --- src/rime/algo/strings.cc | 6 +++--- src/rime/algo/strings.h | 2 +- src/rime/dict/entry_collector.cc | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rime/algo/strings.cc b/src/rime/algo/strings.cc index a0ab82626a..d9cbad48b6 100644 --- a/src/rime/algo/strings.cc +++ b/src/rime/algo/strings.cc @@ -8,7 +8,7 @@ vector split(const string& str, SplitBehavior behavior) { vector strings; size_t lastPos, pos; - if (behavior == SplitBehavior::SkipEmpty) { + if (behavior == SplitBehavior::SkipToken) { lastPos = str.find_first_not_of(delim, 0); } else { lastPos = 0; @@ -17,7 +17,7 @@ vector split(const string& str, while (std::string::npos != pos || std::string::npos != lastPos) { strings.emplace_back(str.substr(lastPos, pos - lastPos)); - if (behavior == SplitBehavior::SkipEmpty) { + if (behavior == SplitBehavior::SkipToken) { lastPos = str.find_first_not_of(delim, pos); } else { if (pos == std::string::npos) { @@ -31,7 +31,7 @@ vector split(const string& str, }; vector split(const string& str, const string& delim) { - return split(str, delim, SplitBehavior::SkipEmpty); + return split(str, delim, SplitBehavior::SkipToken); }; } // namespace strings diff --git a/src/rime/algo/strings.h b/src/rime/algo/strings.h index 2bd570f1c6..222476d2fa 100644 --- a/src/rime/algo/strings.h +++ b/src/rime/algo/strings.h @@ -7,7 +7,7 @@ namespace rime { namespace strings { -enum class SplitBehavior { KeepEmpty, SkipEmpty }; +enum class SplitBehavior { KeepToken, SkipToken }; vector split(const string& str, const string& delim, diff --git a/src/rime/dict/entry_collector.cc b/src/rime/dict/entry_collector.cc index a91c60d38f..23c8496661 100644 --- a/src/rime/dict/entry_collector.cc +++ b/src/rime/dict/entry_collector.cc @@ -87,7 +87,7 @@ void EntryCollector::Collect(const string& dict_file) { continue; } // read a dict entry - auto row = strings::split(line, "\t"); + auto row = strings::split(line, "\t", strings::SplitBehavior::KeepToken); int num_columns = static_cast(row.size()); if (num_columns <= text_column || row[text_column].empty()) { LOG(WARNING) << "Missing entry text at #" << num_entries << ".";