From 81d956359e2f473c0de849d3f864538a42c1718e Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Sun, 16 Mar 2025 08:31:19 -0700 Subject: [PATCH 1/6] Implementing peg2pegtl, based on abnf2pegtl. Added peg grammar header and peg.peg grammar. --- include/tao/pegtl/contrib/peg.hpp | 188 +++++++++++ src/example/pegtl/CMakeLists.txt | 1 + src/example/pegtl/peg.peg | 37 +++ src/example/pegtl/peg2pegtl.cpp | 524 ++++++++++++++++++++++++++++++ 4 files changed, 750 insertions(+) create mode 100644 include/tao/pegtl/contrib/peg.hpp create mode 100644 src/example/pegtl/peg.peg create mode 100644 src/example/pegtl/peg2pegtl.cpp diff --git a/include/tao/pegtl/contrib/peg.hpp b/include/tao/pegtl/contrib/peg.hpp new file mode 100644 index 000000000..89e48d582 --- /dev/null +++ b/include/tao/pegtl/contrib/peg.hpp @@ -0,0 +1,188 @@ +// Copyright (c) 2021 Daniel Deptford +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_PEG_HPP +#define TAO_PEGTL_CONTRIB_PEG_HPP + +#include + +namespace TAO_PEGTL_NAMESPACE::peg +{ + // PEG grammar from https://pdos.csail.mit.edu/~baford/packrat/popl04/peg-popl04.pdf + + // clang-format off + struct AND; + struct Char; + struct Class; + struct CLOSE; + struct Comment; + struct Definition; + struct DOT; + struct EndOfFile; + struct EndOfLine; + struct Expression; + struct QUESTION; + struct IdentCont; + struct Identifier; + struct IdentStart; + struct LEFTARROW; + struct Literal; + struct NOT; + struct OPEN; + struct PLUS; + struct Prefix; + struct Primary; + struct Range; + struct Sequence; + struct SLASH; + struct Space; + struct Spacing; + struct STAR; + struct Suffix; + + struct Grammar : tao::pegtl::seq, EndOfFile> {}; + struct Definition : tao::pegtl::seq {}; + struct Expression : tao::pegtl::seq< + Sequence, + tao::pegtl::star< + tao::pegtl::seq< + SLASH, + Sequence + > + > + > {}; + struct Sequence : tao::pegtl::star {}; + struct Prefix : tao::pegtl::seq< + tao::pegtl::opt< + tao::pegtl::sor< + AND, + NOT + > + >, + Suffix + > {}; + + struct Suffix : tao::pegtl::seq< + Primary, + tao::pegtl::opt< + tao::pegtl::sor< + QUESTION, + STAR, + PLUS + > + > + > {}; + + struct Primary : tao::pegtl::sor< + tao::pegtl::seq >, + tao::pegtl::seq, + Literal, + Class, + DOT> {}; + + struct Identifier : tao::pegtl::seq, Spacing> {}; + + struct IdentStart : tao::pegtl::ranges< 'a', 'z', 'A', 'Z', '_' > {}; + + struct IdentCont : tao::pegtl::sor< + IdentStart, + tao::pegtl::range<'0','9'> + > {}; + + struct Literal : tao::pegtl::sor< + tao::pegtl::seq< + tao::pegtl::one<'\''>, + tao::pegtl::star< + tao::pegtl::seq< + tao::pegtl::not_at< + tao::pegtl::one<'\''> + >, + Char + > + >, + tao::pegtl::one<'\''>, + Spacing + >, + tao::pegtl::seq< + tao::pegtl::one<'\"'>, + tao::pegtl::star< + tao::pegtl::seq< + tao::pegtl::not_at >, + Char + > + >, + tao::pegtl::one<'\"'>, + Spacing + > + > {}; + + struct Class : tao::pegtl::seq< + tao::pegtl::one<'['>, + tao::pegtl::star< + tao::pegtl::seq< + tao::pegtl::not_at >, + Range + > + >, + tao::pegtl::one<']'>, + Spacing + > {}; + + struct Range : tao::pegtl::sor< + tao::pegtl::seq< + Char, + tao::pegtl::one<'-'>, + Char>, + Char + > {}; + + struct Char : tao::pegtl::sor< + tao::pegtl::seq< + tao::pegtl::one<'\\'>, + tao::pegtl::one<'n','r','t','\'','\"','[',']','\\'> >, + tao::pegtl::seq< + tao::pegtl::one<'\\'>, + tao::pegtl::range<'0','2'>, + tao::pegtl::range<'0','7'>, + tao::pegtl::range<'0','7'> >, + tao::pegtl::seq< + tao::pegtl::one<'\\'>, + tao::pegtl::range<'0','7'>, + tao::pegtl::opt > >, + tao::pegtl::seq< + tao::pegtl::not_at >, + tao::pegtl::any> + > {}; + + struct LEFTARROW : tao::pegtl::seq, Spacing> {}; + struct SLASH : tao::pegtl::seq, Spacing> {}; + struct AND : tao::pegtl::seq, Spacing> {}; + struct NOT : tao::pegtl::seq, Spacing> {}; + struct QUESTION : tao::pegtl::seq, Spacing> {}; + struct STAR : tao::pegtl::seq, Spacing> {}; + struct PLUS : tao::pegtl::seq, Spacing> {}; + struct OPEN : tao::pegtl::seq, Spacing> {}; + struct CLOSE : tao::pegtl::seq, Spacing> {}; + struct DOT : tao::pegtl::seq, Spacing> {}; + + struct Spacing : tao::pegtl::star > {}; + struct Comment : + tao::pegtl::seq< + tao::pegtl::one<'#'>, + tao::pegtl::star< + tao::pegtl::seq< + tao::pegtl::not_at, + tao::pegtl::any + > + >, + EndOfLine + > {}; + + struct Space : tao::pegtl::sor, tao::pegtl::one<'\t'>, EndOfLine> {}; + struct EndOfLine : tao::pegtl::sor, tao::pegtl::one<'\n'>, tao::pegtl::one<'\r'> > {}; + struct EndOfFile : tao::pegtl::eof {}; + // clang-format on + +} // namespace TAO_PEGTL_NAMESPACE::peg + +#endif diff --git a/src/example/pegtl/CMakeLists.txt b/src/example/pegtl/CMakeLists.txt index 9fcabc707..487f65ad1 100644 --- a/src/example/pegtl/CMakeLists.txt +++ b/src/example/pegtl/CMakeLists.txt @@ -26,6 +26,7 @@ set(example_sources modulus_match.cpp parse_tree.cpp parse_tree_user_state.cpp + peg2pegtl.cpp proto3.cpp random_order.cpp recover.cpp diff --git a/src/example/pegtl/peg.peg b/src/example/pegtl/peg.peg new file mode 100644 index 000000000..99a7c12a8 --- /dev/null +++ b/src/example/pegtl/peg.peg @@ -0,0 +1,37 @@ +# Parsing Expression Grammar (PEG) taken from +# https://pdos.csail.mit.edu/~baford/packrat/popl04/peg-popl04.pdf + +# Hierarchical syntax +Grammar <- Spacing Definition+ EndOfFile +Definition <- Identifier LEFTARROW Expression +Expression <- Sequence (SLASH Sequence)* +Sequence <- Prefix* +Prefix <- (AND / NOT)? Suffix +Suffix <- Primary (QUESTION / STAR / PLUS)? +Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT + +# Lexical syntax +Identifier <- IdentStart IdentCont* Spacing +IdentStart <- [a-zA-Z_] +IdentCont <- IdentStart / [0-9] +Literal <- ['] (!['] Char)* ['] Spacing / ["] (!["] Char)* ["] Spacing +Class <- '[' (!']' Range)* ']' Spacing +Range <- Char '-' Char / Char +Char <- '\\' [nrt'"\[\]\\] / '\\' [0-2][0-7][0-7] / '\\' [0-7][0-7]? / !'\\' . + +LEFTARROW <- '<-' Spacing +SLASH <- '/' Spacing +AND <- '&' Spacing +NOT <- '!' Spacing +QUESTION <- '?' Spacing +STAR <- '*' Spacing +PLUS <- '+' Spacing +OPEN <- '(' Spacing +CLOSE <- ')' Spacing +DOT <- '.' Spacing + +Spacing <- (Space / Comment)* +Comment <- '#' (!EndOfLine .)* EndOfLine +Space <- ' ' / '\t' / EndOfLine +EndOfLine <- '\r\n' / '\n' / '\r' +EndOfFile <- !. \ No newline at end of file diff --git a/src/example/pegtl/peg2pegtl.cpp b/src/example/pegtl/peg2pegtl.cpp new file mode 100644 index 000000000..3a1437150 --- /dev/null +++ b/src/example/pegtl/peg2pegtl.cpp @@ -0,0 +1,524 @@ +// Copyright (c) 2018-2023 Dr. Colin Hirsch and Daniel Frey +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if defined( _MSC_VER ) +#include +#define TAO_PEGTL_STRCASECMP _stricmp +#else +#include +#define TAO_PEGTL_STRCASECMP strcasecmp +#endif + +#include +#include +#include + +namespace TAO_PEGTL_NAMESPACE +{ + namespace peg + { + using node_ptr = std::unique_ptr< parse_tree::node >; + + namespace + { + std::string prefix = "TAO_PEGTL_NAMESPACE::"; + + std::set< std::string > keywords = { + "alignas", + "alignof", + "and", + "and_eq", + "asm", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "char8_t", + "char16_t", + "char32_t", + "class", + "compl", + "concept", + "const", + "consteval", + "constexpr", + "constinit", + "const_cast", + "continue", + "co_await", + "co_return", + "co_yield", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "noexcept", + "not", + "not_eq", + "nullptr", + "operator", + "or", + "or_eq", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "return", + "requires", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq" + }; + + using rules_t = std::vector< std::string >; + rules_t rules_defined; + rules_t rules; + + // clang-format off + struct one_tag {}; + struct string_tag {}; + struct istring_tag {}; + // clang-format on + + rules_t::reverse_iterator find_rule( rules_t& r, const std::string& v, const rules_t::reverse_iterator& rbegin ) + { + return std::find_if( rbegin, r.rend(), [ & ]( const rules_t::value_type& p ) { return TAO_PEGTL_STRCASECMP( p.c_str(), v.c_str() ) == 0; } ); + } + + rules_t::reverse_iterator find_rule( rules_t& r, const std::string& v ) + { + return find_rule( r, v, r.rbegin() ); + } + + bool append_char( std::string& s, const char c ) + { + if( !s.empty() ) { + s += ", "; + } + s += '\''; + if( c == '\'' || c == '\\' ) { + s += '\\'; + } + s += c; + s += '\''; + return std::isalpha( c ) != 0; + } + + } // namespace + +#if defined( __cpp_exceptions ) + // Using must_if<> we define a control class which is used for + // the parsing run instead of the default control class. + // + // This improves the errors reported to the user. + // + // The following turns local errors into global errors, i.e. + // if one of the rules for which a custom error message is + // defined fails, it throws a parse_error exception (aka global + // failure) instead of returning false (aka local failure). + + // clang-format off + template< typename > inline constexpr const char* error_message = nullptr; + + template<> inline constexpr auto error_message< Char > = "unterminated character literal"; + template<> inline constexpr auto error_message< Expression > = "unterminated expression"; + template<> inline constexpr auto error_message< Grammar > = "unterminated grammar"; + template<> inline constexpr auto error_message< Range > = "unterminated range"; + + // clang-format on + + struct error + { + template< typename Rule > + static constexpr auto message = error_message< Rule >; + }; + + template< typename Rule > + using control = must_if< error >::control< Rule >; +#else + template< typename Rule > + using control = normal< Rule >; +#endif + + // Since we are going to generate a parse tree, we define a + // selector that decides which rules will be included in our + // parse tree, which rules will be omitted from the parse tree, + // and which of the nodes will store the matched content. + // Additionally, some nodes will fold when they have exactly + // one child node. (see fold_one below) + + template< typename Rule > + struct selector + : pegtl::parse_tree::selector< + Rule, + pegtl::parse_tree::store_content::on< + Definition, + Prefix, + Suffix, + Sequence, + Expression, + Class, + Literal, + Identifier, + IdentStart, + Range, + Char, + AND, + NOT, + QUESTION, + STAR, + PLUS, + DOT >, + pegtl::parse_tree::fold_one::on< IdentCont > > + { + template< typename... States > + static void transform( node_ptr& n ) + { + // As we use the PEG grammar taken directly from the original PEG + // paper, some nodes may have excess content from nodes not included + // in the parse tree (e.g. Comment, Space, etc). + + if( !n->children.empty() ) { + n->m_end = n->children.back()->m_end; + } + } + }; + + std::string to_string( const node_ptr& n ); + std::string to_string( const std::vector< node_ptr >& v ); + + namespace + { + std::string get_rulename( const node_ptr& n ) + { + assert( n->is_type< Identifier >() ); + std::string v = n->string(); + std::replace( v.begin(), v.end(), '-', '_' ); + return v; + } + + std::string get_rulename( const node_ptr& n, const bool print_forward_declarations ) + { + std::string v = get_rulename( n ); + const auto it = find_rule( rules, v ); + if( it != rules.rend() ) { + return *it; + } + if( keywords.count( v ) != 0 || v.find( "__" ) != std::string::npos ) { +#if defined( __cpp_exceptions ) + throw parse_error( '\'' + n->string() + "' is a reserved rulename", n->begin() ); +#else + std::cerr << '\'' + n->string() + "' is a reserved rulename" << std::endl; + std::terminate(); +#endif + } + if( print_forward_declarations && find_rule( rules_defined, v ) != rules_defined.rend() ) { + std::cout << "struct " << v << ";\n"; + } + rules.push_back( v ); + return v; + } + + struct ccmp + { + bool operator()( const std::string& lhs, const std::string& rhs ) const noexcept + { + return TAO_PEGTL_STRCASECMP( lhs.c_str(), rhs.c_str() ) < 0; + } + }; + + std::map< std::string, parse_tree::node*, ccmp > previous_rules; + + } // namespace + + template<> + struct selector< Definition > + : std::true_type + { + template< typename... States > + static void transform( node_ptr& n ) + { + const auto idname = get_rulename( n->children.front() ); + assert( n->children.back()->is_type< Expression >() ); + if( !previous_rules.try_emplace( idname, n.get() ).second ) { +#if defined( __cpp_exceptions ) + throw parse_error( "identifier '" + idname + "' is already defined", n->begin() ); +#else + std::cerr << "identifier '" + idname + "' is already defined" << std::endl; + std::terminate(); +#endif + } + } + }; + + // Finally, the generated parse tree for each node is converted to + // a C++ source code string. + + struct stringifier + { + using function_t = std::string ( * )( const node_ptr& n ); + function_t default_ = nullptr; + + std::map< std::string_view, function_t > map_; + + template< typename T > + void add( const function_t& f ) + { + map_.try_emplace( demangle< T >(), f ); + } + + std::string operator()( const node_ptr& n ) const + { + const auto it = map_.find( n->type ); + if( it != map_.end() ) { + return it->second( n ); + } + return default_( n ); + } + }; + + stringifier make_stringifier() + { + stringifier nrv; + nrv.default_ = []( const node_ptr& n ) -> std::string { +#if defined( __cpp_exceptions ) + throw parse_error( "missing to_string() for " + std::string( n->type ), n->begin() ); +#else + std::cerr << "missing to_string() for " + std::string( n->type ) << std::endl; + std::terminate(); +#endif + }; + nrv.add< Identifier >( []( const node_ptr& n ) { return get_rulename( n, true ); } ); + + nrv.add< Definition >( []( const node_ptr& n ) { + return "struct " + get_rulename( n->children.front(), false ) + " : " + to_string( n->children.back() ) + " {};"; + } ); + + nrv.add< Char >( []( const node_ptr& n ) { + const auto content = n->string_view(); + std::string s; + for( const auto c : content ) { + append_char( s, c ); + } + return s; + } ); + + nrv.add< Sequence >( []( const node_ptr& n ) { + if( n->children.size() == 1 ) { + return to_string( n->children.front() ); + } + + return prefix + "seq< " + to_string( n->children ) + " >"; + } ); + + nrv.add< Expression >( []( const node_ptr& n ) { + if( n->children.size() == 1 ) { + return to_string( n->children.front() ); + } + + return prefix + "sor< " + to_string( n->children ) + " >"; + } ); + + nrv.add< Range >( []( const node_ptr& n ) { + if( n->children.size() == 1 ) { + return prefix + "one< " + to_string( n->children.front() ) + " >"; + } + + return prefix + "range< " + to_string( n->children.front() ) + ", " + to_string( n->children.back() ) + " >"; + } ); + + nrv.add< Class >( []( const node_ptr& n ) { + if( n->children.size() == 1 ) { + return to_string( n->children.front() ); + } + + return prefix + "sor < " + to_string( n->children ) + " >"; + } ); + + nrv.add< Literal >( []( const node_ptr& n ) { + if( n->children.size() == 1 ) { + return prefix + "one< " + to_string( n->children.front() ) + " >"; + } + + return prefix + "string< " + to_string( n->children ) + " >"; + } ); + + nrv.add< Prefix >( []( const node_ptr& n ) { + const auto sub = to_string( n->children.back() ); + + if( n->children.front()->is_type< AND >() ) { + return prefix + "at< " + sub + " >"; + } + + if( n->children.front()->is_type< NOT >() ) { + return prefix + "not_at< " + sub + " >"; + } + + assert( n->children.size() == 1 ); + return sub; + } ); + + nrv.add< Suffix >( []( const node_ptr& n ) { + const auto sub = to_string( n->children.front() ); + + if( n->children.back()->is_type< QUESTION >() ) { + return prefix + "opt< " + sub + " >"; + } + + if( n->children.back()->is_type< STAR >() ) { + return prefix + "star< " + sub + " >"; + } + + if( n->children.back()->is_type< PLUS >() ) { + return prefix + "plus< " + sub + " >"; + } + + assert( n->children.size() == 1 ); + return sub; + } ); + + nrv.add< DOT >( []( const node_ptr& ) { + return prefix + "any"; + } ); + + return nrv; + } + + std::string to_string( const node_ptr& n ) + { + static stringifier s = make_stringifier(); + return s( n ); + } + + std::string to_string( const std::vector< node_ptr >& v ) + { + std::string result; + for( const auto& c : v ) { + if( !result.empty() ) { + result += ", "; + } + result += to_string( c ); + } + return result; + } + + } // namespace peg + +} // namespace TAO_PEGTL_NAMESPACE + +int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) +{ + using namespace TAO_PEGTL_NAMESPACE; + + if( argc != 2 ) { + std::cerr << "Usage: " << argv[ 0 ] << " SOURCE\n"; + return 1; + } + + if( !std::filesystem::exists( argv[ 1 ] ) ) { + std::cerr << "Missing source grammar file " << argv[ 1 ] << '\n'; + return 1; + } + + file_input in( argv[ 1 ] ); +#if defined( __cpp_exceptions ) + try { + const auto root = parse_tree::parse< peg::Grammar, peg::selector, nothing, peg::control >( in ); + + for( const auto& rule : root->children ) { + peg::rules_defined.push_back( peg::get_rulename( rule->children.front() ) ); + } + + for( const auto& rule : root->children ) { + std::cout << peg::to_string( rule ) << '\n'; + } + } + catch( const parse_error& e ) { + const auto& p = e.position_object(); + std::cerr << e.what() << '\n' + << in.line_at( p ) << '\n' + << std::setw( int( p.column ) ) << '^' << '\n'; + } +#else + if( const auto root = parse_tree::parse< peg::Grammar, peg::selector, nothing, peg::control >( in ) ) { + for( const auto& rule : root->children ) { + peg::rules_defined.push_back( peg::get_rulename( rule->children.front() ) ); + } + + for( const auto& rule : root->children ) { + std::cout << peg::to_string( rule ) << '\n'; + } + } + else { + std::cerr << "error occurred" << std::endl; + return 1; + } +#endif + + return 0; +} From 2f66b729ac98038e35d6e2f8aa6fd1be13750e06 Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Sun, 16 Mar 2025 15:15:32 -0700 Subject: [PATCH 2/6] WIP: Bazel --- .bazelrc | 8 ++ BUILD.bazel | 6 + MODULE.bazel | 0 src/example/pegtl/BUILD.bazel | 227 ++++++++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 .bazelrc create mode 100644 BUILD.bazel create mode 100644 MODULE.bazel create mode 100644 src/example/pegtl/BUILD.bazel diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..f190f670a --- /dev/null +++ b/.bazelrc @@ -0,0 +1,8 @@ +common --enable_platform_specific_config + +build:linux --cxxopt=-std=c++17 +build:linux --host_cxxopt=-std=c++17 +build:macos --cxxopt=-std=c++17 +build:macos --host_cxxopt=-std=c++17 +build:windows --cxxopt=/std:c++17 +build:windows --host_cxxopt=/std:c++17 \ No newline at end of file diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 000000000..5824914f1 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,6 @@ +cc_library( + name = "pegtl", + hdrs = glob(["include/**/*.hpp"]), + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) \ No newline at end of file diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 000000000..e69de29bb diff --git a/src/example/pegtl/BUILD.bazel b/src/example/pegtl/BUILD.bazel new file mode 100644 index 000000000..c7ef93694 --- /dev/null +++ b/src/example/pegtl/BUILD.bazel @@ -0,0 +1,227 @@ +cc_binary( + name = "abnf2pegtl", + srcs = ["abnf2pegtl.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "analyze", + srcs = ["analyze.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "calculator", + srcs = ["calculator.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "chomsky_hierarchy", + srcs = ["chomsky_hierarchy.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "csv1", + srcs = ["csv1.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "csv2", + srcs = ["csv2.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "dynamic_match", + srcs = ["dynamic_match.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "expression", + srcs = ["expression.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "hello_world", + srcs = ["hello_world.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "indent_aware", + srcs = ["indent_aware.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "iri", + srcs = ["iri.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_analyze", + srcs = ["json_analyze.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_ast", + srcs = ["json_ast.cpp", "json_errors.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_build", + srcs = ["json_build.cpp", "json_classes.hpp", "json_errors.hpp", "json_unescape.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_count", + srcs = ["json_count.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_coverage", + srcs = ["json_coverage.cpp", "json_errors.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_parse", + srcs = ["json_parse.cpp", "json_errors.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_print_debug", + srcs = ["json_print_debug.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_print_names", + srcs = ["json_print_names.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "json_trace", + srcs = ["json_trace.cpp", "json_errors.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "lua53_analyze", + srcs = ["lua53_analyze.cpp", "lua53.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "lua53_parse", + srcs = ["lua53_parse.cpp", "lua53.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "modulus_match", + srcs = ["modulus_match.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "parse_tree", + srcs = ["parse_tree.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "parse_tree_user_state", + srcs = ["parse_tree_user_state.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "peg2pegtl", + srcs = ["peg2pegtl.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "proto3", + srcs = ["proto3.cpp", "proto3.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "random_order", + srcs = ["random_order.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "recover", + srcs = ["recover.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "s_expression", + srcs = ["s_expression.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "sum", + srcs = ["sum.cpp", "double.hpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "symbol_table", + srcs = ["symbol_table.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "token_input", + srcs = ["token_input.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "unescape", + srcs = ["unescape.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "uri", + srcs = ["uri.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "uri_print_debug", + srcs = ["uri_print_debug.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "uri_print_names", + srcs = ["uri_print_names.cpp"], + deps = ["//:pegtl"], +) + +cc_binary( + name = "uri_trace", + srcs = ["uri_trace.cpp"], + deps = ["//:pegtl"], +) From 9c9fcd47df2f920cc14c85587d05a817e8eb780d Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Mon, 17 Mar 2025 20:03:21 -0700 Subject: [PATCH 3/6] Fix character escaping --- src/example/pegtl/peg2pegtl.cpp | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/example/pegtl/peg2pegtl.cpp b/src/example/pegtl/peg2pegtl.cpp index 3a1437150..fcacc7ce1 100644 --- a/src/example/pegtl/peg2pegtl.cpp +++ b/src/example/pegtl/peg2pegtl.cpp @@ -155,20 +155,6 @@ namespace TAO_PEGTL_NAMESPACE return find_rule( r, v, r.rbegin() ); } - bool append_char( std::string& s, const char c ) - { - if( !s.empty() ) { - s += ", "; - } - s += '\''; - if( c == '\'' || c == '\\' ) { - s += '\\'; - } - s += c; - s += '\''; - return std::isalpha( c ) != 0; - } - } // namespace #if defined( __cpp_exceptions ) @@ -360,12 +346,17 @@ namespace TAO_PEGTL_NAMESPACE } ); nrv.add< Char >( []( const node_ptr& n ) { - const auto content = n->string_view(); - std::string s; - for( const auto c : content ) { - append_char( s, c ); + if (n->string_view() == "\\[") { + return std::string("'['"); + } else if (n->string_view() == "\\]") { + return std::string("']'"); + } else if (n->string_view() == "\\") { + return std::string("'\\'"); + } else if (n->string_view() == "'") { + return "\'\\" + n->string() + '\''; } - return s; + + return '\'' + n->string() + '\''; } ); nrv.add< Sequence >( []( const node_ptr& n ) { From 7ab8695c73d49e01d8dd23b3aa33e00d7d638ee3 Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Mon, 17 Mar 2025 20:04:20 -0700 Subject: [PATCH 4/6] Remove bazel references, oops.. --- MODULE.bazel | 0 src/example/pegtl/BUILD.bazel | 227 ---------------------------------- 2 files changed, 227 deletions(-) delete mode 100644 MODULE.bazel delete mode 100644 src/example/pegtl/BUILD.bazel diff --git a/MODULE.bazel b/MODULE.bazel deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/example/pegtl/BUILD.bazel b/src/example/pegtl/BUILD.bazel deleted file mode 100644 index c7ef93694..000000000 --- a/src/example/pegtl/BUILD.bazel +++ /dev/null @@ -1,227 +0,0 @@ -cc_binary( - name = "abnf2pegtl", - srcs = ["abnf2pegtl.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "analyze", - srcs = ["analyze.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "calculator", - srcs = ["calculator.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "chomsky_hierarchy", - srcs = ["chomsky_hierarchy.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "csv1", - srcs = ["csv1.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "csv2", - srcs = ["csv2.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "dynamic_match", - srcs = ["dynamic_match.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "expression", - srcs = ["expression.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "hello_world", - srcs = ["hello_world.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "indent_aware", - srcs = ["indent_aware.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "iri", - srcs = ["iri.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_analyze", - srcs = ["json_analyze.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_ast", - srcs = ["json_ast.cpp", "json_errors.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_build", - srcs = ["json_build.cpp", "json_classes.hpp", "json_errors.hpp", "json_unescape.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_count", - srcs = ["json_count.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_coverage", - srcs = ["json_coverage.cpp", "json_errors.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_parse", - srcs = ["json_parse.cpp", "json_errors.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_print_debug", - srcs = ["json_print_debug.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_print_names", - srcs = ["json_print_names.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "json_trace", - srcs = ["json_trace.cpp", "json_errors.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "lua53_analyze", - srcs = ["lua53_analyze.cpp", "lua53.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "lua53_parse", - srcs = ["lua53_parse.cpp", "lua53.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "modulus_match", - srcs = ["modulus_match.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "parse_tree", - srcs = ["parse_tree.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "parse_tree_user_state", - srcs = ["parse_tree_user_state.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "peg2pegtl", - srcs = ["peg2pegtl.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "proto3", - srcs = ["proto3.cpp", "proto3.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "random_order", - srcs = ["random_order.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "recover", - srcs = ["recover.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "s_expression", - srcs = ["s_expression.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "sum", - srcs = ["sum.cpp", "double.hpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "symbol_table", - srcs = ["symbol_table.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "token_input", - srcs = ["token_input.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "unescape", - srcs = ["unescape.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "uri", - srcs = ["uri.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "uri_print_debug", - srcs = ["uri_print_debug.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "uri_print_names", - srcs = ["uri_print_names.cpp"], - deps = ["//:pegtl"], -) - -cc_binary( - name = "uri_trace", - srcs = ["uri_trace.cpp"], - deps = ["//:pegtl"], -) From 3420c9bf01149d05488916edd414bd2f5cbeaa09 Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Mon, 17 Mar 2025 20:05:54 -0700 Subject: [PATCH 5/6] Fix formatting --- src/example/pegtl/peg2pegtl.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/example/pegtl/peg2pegtl.cpp b/src/example/pegtl/peg2pegtl.cpp index fcacc7ce1..3e725e4c5 100644 --- a/src/example/pegtl/peg2pegtl.cpp +++ b/src/example/pegtl/peg2pegtl.cpp @@ -346,13 +346,16 @@ namespace TAO_PEGTL_NAMESPACE } ); nrv.add< Char >( []( const node_ptr& n ) { - if (n->string_view() == "\\[") { - return std::string("'['"); - } else if (n->string_view() == "\\]") { - return std::string("']'"); - } else if (n->string_view() == "\\") { - return std::string("'\\'"); - } else if (n->string_view() == "'") { + if( n->string_view() == "\\[" ) { + return std::string( "'['" ); + } + else if( n->string_view() == "\\]" ) { + return std::string( "']'" ); + } + else if( n->string_view() == "\\" ) { + return std::string( "'\\'" ); + } + else if( n->string_view() == "'" ) { return "\'\\" + n->string() + '\''; } From 1277923c44ba25f3c877c6f28031e95191bc1e94 Mon Sep 17 00:00:00 2001 From: Daniel Deptford Date: Mon, 7 Apr 2025 19:02:25 -0700 Subject: [PATCH 6/6] Removing errant bazel files, mistake --- .bazelrc | 8 -------- BUILD.bazel | 6 ------ 2 files changed, 14 deletions(-) delete mode 100644 .bazelrc delete mode 100644 BUILD.bazel diff --git a/.bazelrc b/.bazelrc deleted file mode 100644 index f190f670a..000000000 --- a/.bazelrc +++ /dev/null @@ -1,8 +0,0 @@ -common --enable_platform_specific_config - -build:linux --cxxopt=-std=c++17 -build:linux --host_cxxopt=-std=c++17 -build:macos --cxxopt=-std=c++17 -build:macos --host_cxxopt=-std=c++17 -build:windows --cxxopt=/std:c++17 -build:windows --host_cxxopt=/std:c++17 \ No newline at end of file diff --git a/BUILD.bazel b/BUILD.bazel deleted file mode 100644 index 5824914f1..000000000 --- a/BUILD.bazel +++ /dev/null @@ -1,6 +0,0 @@ -cc_library( - name = "pegtl", - hdrs = glob(["include/**/*.hpp"]), - strip_include_prefix = "include", - visibility = ["//visibility:public"], -) \ No newline at end of file