From 2c5a0a36a67927dcbde1630a657ab6251958701f Mon Sep 17 00:00:00 2001 From: nopdan Date: Sun, 3 Sep 2023 15:05:12 +0800 Subject: [PATCH 1/4] move TableQuery definition to header file --- src/rime/dict/table.cc | 30 ------------------------------ src/rime/dict/table.h | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/rime/dict/table.cc b/src/rime/dict/table.cc index a750d12fe0..76d59801eb 100644 --- a/src/rime/dict/table.cc +++ b/src/rime/dict/table.cc @@ -21,36 +21,6 @@ const int kTableFormatLowestCompatible = 4.0; const char kTableFormatPrefix[] = "Rime::Table/"; const size_t kTableFormatPrefixLen = sizeof(kTableFormatPrefix) - 1; -class TableQuery { - public: - TableQuery(table::Index* index) : lv1_index_(index) { Reset(); } - - TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const; - - // down to next level - bool Advance(SyllableId syllable_id, double credibility = 0.0); - - // up one level - bool Backdate(); - - // back to root - void Reset(); - - size_t level() const { return level_; } - - protected: - size_t level_ = 0; - Code index_code_; - vector credibility_; - - private: - bool Walk(SyllableId syllable_id); - - table::HeadIndex* lv1_index_ = nullptr; - table::TrunkIndex* lv2_index_ = nullptr; - table::TrunkIndex* lv3_index_ = nullptr; - table::TailIndex* lv4_index_ = nullptr; -}; TableAccessor::TableAccessor(const Code& index_code, const List* list, diff --git a/src/rime/dict/table.h b/src/rime/dict/table.h index e5186be144..232f538246 100644 --- a/src/rime/dict/table.h +++ b/src/rime/dict/table.h @@ -128,7 +128,37 @@ class TableAccessor { using TableQueryResult = map>; struct SyllableGraph; -class TableQuery; + +class TableQuery { + public: + TableQuery(table::Index* index) : lv1_index_(index) { Reset(); } + + TableAccessor Access(SyllableId syllable_id, double credibility = 0.0) const; + + // down to next level + bool Advance(SyllableId syllable_id, double credibility = 0.0); + + // up one level + bool Backdate(); + + // back to root + void Reset(); + + size_t level() const { return level_; } + + protected: + size_t level_ = 0; + Code index_code_; + vector credibility_; + + private: + bool Walk(SyllableId syllable_id); + + table::HeadIndex* lv1_index_ = nullptr; + table::TrunkIndex* lv2_index_ = nullptr; + table::TrunkIndex* lv3_index_ = nullptr; + table::TailIndex* lv4_index_ = nullptr; +}; class Table : public MappedFile { public: @@ -152,6 +182,7 @@ class Table : public MappedFile { RIME_API string GetEntryText(const table::Entry& entry); uint32_t dict_file_checksum() const; + table::Metadata* metadata() const { return metadata_; } private: table::Index* BuildIndex(const Vocabulary& vocabulary, size_t num_syllables); From 7cd8566c7c5ecc0f649b8f83332311d63a93f103 Mon Sep 17 00:00:00 2001 From: nopdan Date: Sun, 3 Sep 2023 15:18:01 +0800 Subject: [PATCH 2/4] add rime_table_decompiler.cc --- src/rime/dict/table.cc | 1 - tools/CMakeLists.txt | 13 ++++ tools/rime_table_decompiler.cc | 105 +++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 tools/rime_table_decompiler.cc diff --git a/src/rime/dict/table.cc b/src/rime/dict/table.cc index 76d59801eb..3f10d77e09 100644 --- a/src/rime/dict/table.cc +++ b/src/rime/dict/table.cc @@ -21,7 +21,6 @@ const int kTableFormatLowestCompatible = 4.0; const char kTableFormatPrefix[] = "Rime::Table/"; const size_t kTableFormatPrefixLen = sizeof(kTableFormatPrefix) - 1; - TableAccessor::TableAccessor(const Code& index_code, const List* list, double credibility) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index bb0361cc79..ab64c52a15 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -37,8 +37,21 @@ target_link_libraries(rime_deployer ${rime_dict_library} ${rime_levers_library}) +set(rime_table_decompiler_src + "rime_table_decompiler.cc" + ${CMAKE_SOURCE_DIR}/src/rime/dict/table.cc + ${CMAKE_SOURCE_DIR}/src/rime/dict/mapped_file.cc + ${CMAKE_SOURCE_DIR}/src/rime/dict/string_table.cc + ${CMAKE_SOURCE_DIR}/src/rime/dict/vocabulary.cc + ) +add_executable(rime_table_decompiler ${rime_table_decompiler_src}) +target_link_libraries(rime_table_decompiler + ${rime_library} + ${rime_dict_library}) + install(TARGETS rime_deployer DESTINATION ${BIN_INSTALL_DIR}) install(TARGETS rime_dict_manager DESTINATION ${BIN_INSTALL_DIR}) +install(TARGETS rime_table_decompiler DESTINATION ${BIN_INSTALL_DIR}) install(TARGETS rime_patch DESTINATION ${BIN_INSTALL_DIR}) diff --git a/tools/rime_table_decompiler.cc b/tools/rime_table_decompiler.cc new file mode 100644 index 0000000000..aa66557059 --- /dev/null +++ b/tools/rime_table_decompiler.cc @@ -0,0 +1,105 @@ +// rime_table_decompiler.cc +// nopdan +// +#include +#include +#include +#include +#include +#include + +using namespace std; +ofstream fout; + +void outCode(rime::Table* table, const rime::Code code, ofstream& fout) { + if (code.empty()) { + return; + } + auto item = code.begin(); + fout << table->GetSyllableById(*item); + item++; + for (; item != code.end(); ++item) { + fout << " "; + fout << table->GetSyllableById(*item); + } + return; +} + +void access(rime::Table* table, rime::TableAccessor accessor) { + while (!accessor.exhausted()) { + auto word = table->GetEntryText(*accessor.entry()); + fout << word << "\t"; + outCode(table, accessor.code(), fout); + + auto weight = accessor.entry()->weight; + if (weight >= 0) { + fout << "\t" << exp(weight); + } + fout << endl; + accessor.Next(); + } +} + +// 递归遍历 +void recursion(rime::Table* table, ofstream& fout, rime::TableQuery* query) { + for (int i = 0; i < table->metadata()->num_syllables; i++) { + auto accessor = query->Access(i); + access(table, accessor); + if (query->Advance(i)) { + if (query->level() < 3) { + recursion(table, fout, query); + } else { + auto accessor = query->Access(0); + access(table, accessor); + } + query->Backdate(); + } + } +} + +void traversal(rime::Table* table, ofstream& fout) { + auto metadata = table->metadata(); + cout << "num_syllables: " << metadata->num_syllables << endl; + cout << "num_entries: " << metadata->num_entries << endl; + + fout << fixed; + fout << setprecision(0); + rime::TableQuery query(table->metadata()->index.get()); + recursion(table, fout, &query); +} + +int main(int argc, char* argv[]) { + string fileName(argv[1]); + + cout << "Read File: " << fileName << endl; + rime::Table table(fileName); + table.Load(); + + // Remove directory if present. + // Do this before extension removal incase directory has a period character. + const size_t last_slash_idx = fileName.find_last_of("\\/"); + if (std::string::npos != last_slash_idx) { + fileName.erase(0, last_slash_idx + 1); + } + + // Remove extension if present. + const size_t period_idx = fileName.find('.'); + if (std::string::npos != period_idx) { + fileName.erase(period_idx); + } + + string outputName = fileName + ".txt"; + fout.open(outputName); + // clang-format off + fout << "# Rime dictionary\n\n"; + fout << "---\n" + "name: " << fileName << "\n" + "version: \"1.0\"\n" + "...\n\n"; + // clang-format on + traversal(&table, fout); + cout << "Save To: " << outputName << endl + << endl; + fout.close(); + return 0; +} From cbefe73a069186b1943c58030de9e3951c98a985 Mon Sep 17 00:00:00 2001 From: nopdan Date: Tue, 5 Sep 2023 00:00:39 +0800 Subject: [PATCH 3/4] support custom save path --- tools/rime_table_decompiler.cc | 85 +++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/tools/rime_table_decompiler.cc b/tools/rime_table_decompiler.cc index aa66557059..b5ca9ada1e 100644 --- a/tools/rime_table_decompiler.cc +++ b/tools/rime_table_decompiler.cc @@ -6,12 +6,15 @@ #include #include #include +#include #include -using namespace std; -ofstream fout; +// usage: +// rime_table_decompiler [save-path] +// example: +// rime_table_decompiler pinyin.table.bin pinyin.dict.yaml -void outCode(rime::Table* table, const rime::Code code, ofstream& fout) { +void outCode(rime::Table* table, const rime::Code code, std::ofstream& fout) { if (code.empty()) { return; } @@ -25,7 +28,9 @@ void outCode(rime::Table* table, const rime::Code code, ofstream& fout) { return; } -void access(rime::Table* table, rime::TableAccessor accessor) { +void access(rime::Table* table, + rime::TableAccessor accessor, + std::ofstream& fout) { while (!accessor.exhausted()) { auto word = table->GetEntryText(*accessor.entry()); fout << word << "\t"; @@ -35,61 +40,78 @@ void access(rime::Table* table, rime::TableAccessor accessor) { if (weight >= 0) { fout << "\t" << exp(weight); } - fout << endl; + fout << std::endl; accessor.Next(); } } -// 递归遍历 -void recursion(rime::Table* table, ofstream& fout, rime::TableQuery* query) { +// recursively traverse table +void recursion(rime::Table* table, + rime::TableQuery* query, + std::ofstream& fout) { for (int i = 0; i < table->metadata()->num_syllables; i++) { auto accessor = query->Access(i); - access(table, accessor); + access(table, accessor, fout); if (query->Advance(i)) { if (query->level() < 3) { - recursion(table, fout, query); + recursion(table, query, fout); } else { auto accessor = query->Access(0); - access(table, accessor); + access(table, accessor, fout); } query->Backdate(); } } } -void traversal(rime::Table* table, ofstream& fout) { +void traversal(rime::Table* table, std::ofstream& fout) { auto metadata = table->metadata(); - cout << "num_syllables: " << metadata->num_syllables << endl; - cout << "num_entries: " << metadata->num_entries << endl; + std::cout << "num_syllables: " << metadata->num_syllables << std::endl; + std::cout << "num_entries: " << metadata->num_entries << std::endl; - fout << fixed; - fout << setprecision(0); + fout << std::fixed; + fout << std::setprecision(0); rime::TableQuery query(table->metadata()->index.get()); - recursion(table, fout, &query); + recursion(table, &query, fout); } int main(int argc, char* argv[]) { - string fileName(argv[1]); + if (argc < 2) { + std::cout << "Usage: rime_table_decompiler [save-path]" + << std::endl; + std::cout << "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml" + << std::endl; + return 0; + } - cout << "Read File: " << fileName << endl; + std::string fileName(argv[1]); rime::Table table(fileName); - table.Load(); - - // Remove directory if present. - // Do this before extension removal incase directory has a period character. - const size_t last_slash_idx = fileName.find_last_of("\\/"); - if (std::string::npos != last_slash_idx) { - fileName.erase(0, last_slash_idx + 1); + bool success = table.Load(); + if (!success) { + std::cerr << "Failed to load table." << std::endl; + return 1; } - // Remove extension if present. - const size_t period_idx = fileName.find('.'); - if (std::string::npos != period_idx) { - fileName.erase(period_idx); + // Remove the extension ".table.bin" if present. + const size_t table_bin_idx = fileName.rfind(".table.bin"); + if (std::string::npos != table_bin_idx) { + fileName.erase(table_bin_idx); } + const std::string outputName = + (argc == 3) ? argv[2]: fileName + ".yaml"; - string outputName = fileName + ".txt"; + std::ofstream fout; fout.open(outputName); + if (!fout.is_open()) { + std::cerr << "Failed to open file " << outputName << std::endl; + return 1; + } + + // schema id + const size_t last_slash_idx = fileName.find_last_of("\\/"); + if (std::string::npos != last_slash_idx) { + fileName.erase(0, last_slash_idx + 1); + } // clang-format off fout << "# Rime dictionary\n\n"; fout << "---\n" @@ -98,8 +120,7 @@ int main(int argc, char* argv[]) { "...\n\n"; // clang-format on traversal(&table, fout); - cout << "Save To: " << outputName << endl - << endl; + std::cout << "Save to: " << outputName << std::endl; fout.close(); return 0; } From 9e0ebf28b8ec3c60d28989673655eb2cd434c105 Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Mon, 4 Sep 2023 12:47:06 -0400 Subject: [PATCH 4/4] minor tweak [skip ci] --- tools/rime_table_decompiler.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/rime_table_decompiler.cc b/tools/rime_table_decompiler.cc index b5ca9ada1e..76a1168323 100644 --- a/tools/rime_table_decompiler.cc +++ b/tools/rime_table_decompiler.cc @@ -76,7 +76,7 @@ void traversal(rime::Table* table, std::ofstream& fout) { } int main(int argc, char* argv[]) { - if (argc < 2) { + if (argc < 2 || argc > 3) { std::cout << "Usage: rime_table_decompiler [save-path]" << std::endl; std::cout << "Example: rime_table_decompiler pinyin.table.bin pinyin.dict.yaml" @@ -112,13 +112,11 @@ int main(int argc, char* argv[]) { if (std::string::npos != last_slash_idx) { fileName.erase(0, last_slash_idx + 1); } - // clang-format off fout << "# Rime dictionary\n\n"; fout << "---\n" "name: " << fileName << "\n" "version: \"1.0\"\n" "...\n\n"; - // clang-format on traversal(&table, fout); std::cout << "Save to: " << outputName << std::endl; fout.close();