Skip to content

Implementing peg2pegtl, based on abnf2pegtl. #377

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions include/tao/pegtl/contrib/peg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Copyright (c) 2021 Daniel Deptford
// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/

#ifndef TAO_PEGTL_CONTRIB_PEG_HPP
#define TAO_PEGTL_CONTRIB_PEG_HPP

#include <tao/pegtl.hpp>

namespace TAO_PEGTL_NAMESPACE::peg
{
// PEG grammar from https://pdos.csail.mit.edu/~baford/packrat/popl04/peg-popl04.pdf

// clang-format off
struct AND;
struct Char;
struct Class;
struct CLOSE;
struct Comment;
struct Definition;
struct DOT;
struct EndOfFile;
struct EndOfLine;
struct Expression;
struct QUESTION;
struct IdentCont;
struct Identifier;
struct IdentStart;
struct LEFTARROW;
struct Literal;
struct NOT;
struct OPEN;
struct PLUS;
struct Prefix;
struct Primary;
struct Range;
struct Sequence;
struct SLASH;
struct Space;
struct Spacing;
struct STAR;
struct Suffix;

struct Grammar : tao::pegtl::seq<Spacing, tao::pegtl::plus<Definition>, EndOfFile> {};
struct Definition : tao::pegtl::seq<Identifier, LEFTARROW, Expression> {};
struct Expression : tao::pegtl::seq<
Sequence,
tao::pegtl::star<
tao::pegtl::seq<
SLASH,
Sequence
>
>
> {};
struct Sequence : tao::pegtl::star<Prefix> {};
struct Prefix : tao::pegtl::seq<
tao::pegtl::opt<
tao::pegtl::sor<
AND,
NOT
>
>,
Suffix
> {};

struct Suffix : tao::pegtl::seq<
Primary,
tao::pegtl::opt<
tao::pegtl::sor<
QUESTION,
STAR,
PLUS
>
>
> {};

struct Primary : tao::pegtl::sor<
tao::pegtl::seq<Identifier, tao::pegtl::not_at<LEFTARROW> >,
tao::pegtl::seq<OPEN, Expression, CLOSE>,
Literal,
Class,
DOT> {};

struct Identifier : tao::pegtl::seq<IdentStart, tao::pegtl::star<IdentCont>, Spacing> {};

struct IdentStart : tao::pegtl::ranges< 'a', 'z', 'A', 'Z', '_' > {};

struct IdentCont : tao::pegtl::sor<
IdentStart,
tao::pegtl::range<'0','9'>
> {};

struct Literal : tao::pegtl::sor<
tao::pegtl::seq<
tao::pegtl::one<'\''>,
tao::pegtl::star<
tao::pegtl::seq<
tao::pegtl::not_at<
tao::pegtl::one<'\''>
>,
Char
>
>,
tao::pegtl::one<'\''>,
Spacing
>,
tao::pegtl::seq<
tao::pegtl::one<'\"'>,
tao::pegtl::star<
tao::pegtl::seq<
tao::pegtl::not_at<tao::pegtl::one<'\"'> >,
Char
>
>,
tao::pegtl::one<'\"'>,
Spacing
>
> {};

struct Class : tao::pegtl::seq<
tao::pegtl::one<'['>,
tao::pegtl::star<
tao::pegtl::seq<
tao::pegtl::not_at<tao::pegtl::one<']'> >,
Range
>
>,
tao::pegtl::one<']'>,
Spacing
> {};

struct Range : tao::pegtl::sor<
tao::pegtl::seq<
Char,
tao::pegtl::one<'-'>,
Char>,
Char
> {};

struct Char : tao::pegtl::sor<
tao::pegtl::seq<
tao::pegtl::one<'\\'>,
tao::pegtl::one<'n','r','t','\'','\"','[',']','\\'> >,
tao::pegtl::seq<
tao::pegtl::one<'\\'>,
tao::pegtl::range<'0','2'>,
tao::pegtl::range<'0','7'>,
tao::pegtl::range<'0','7'> >,
tao::pegtl::seq<
tao::pegtl::one<'\\'>,
tao::pegtl::range<'0','7'>,
tao::pegtl::opt<tao::pegtl::range<'0','7'> > >,
tao::pegtl::seq<
tao::pegtl::not_at<tao::pegtl::one<'\\'> >,
tao::pegtl::any>
> {};

struct LEFTARROW : tao::pegtl::seq<tao::pegtl::string<'<','-'>, Spacing> {};
struct SLASH : tao::pegtl::seq<tao::pegtl::one<'/'>, Spacing> {};
struct AND : tao::pegtl::seq<tao::pegtl::one<'&'>, Spacing> {};
struct NOT : tao::pegtl::seq<tao::pegtl::one<'!'>, Spacing> {};
struct QUESTION : tao::pegtl::seq<tao::pegtl::one<'?'>, Spacing> {};
struct STAR : tao::pegtl::seq<tao::pegtl::one<'*'>, Spacing> {};
struct PLUS : tao::pegtl::seq<tao::pegtl::one<'+'>, Spacing> {};
struct OPEN : tao::pegtl::seq<tao::pegtl::one<'('>, Spacing> {};
struct CLOSE : tao::pegtl::seq<tao::pegtl::one<')'>, Spacing> {};
struct DOT : tao::pegtl::seq<tao::pegtl::one<'.'>, Spacing> {};

struct Spacing : tao::pegtl::star<tao::pegtl::sor<Space, Comment> > {};
struct Comment :
tao::pegtl::seq<
tao::pegtl::one<'#'>,
tao::pegtl::star<
tao::pegtl::seq<
tao::pegtl::not_at<EndOfLine>,
tao::pegtl::any
>
>,
EndOfLine
> {};

struct Space : tao::pegtl::sor<tao::pegtl::one<' '>, tao::pegtl::one<'\t'>, EndOfLine> {};
struct EndOfLine : tao::pegtl::sor<tao::pegtl::string<'\r','\n'>, tao::pegtl::one<'\n'>, tao::pegtl::one<'\r'> > {};
struct EndOfFile : tao::pegtl::eof {};
// clang-format on

} // namespace TAO_PEGTL_NAMESPACE::peg

#endif
1 change: 1 addition & 0 deletions src/example/pegtl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set(example_sources
modulus_match.cpp
parse_tree.cpp
parse_tree_user_state.cpp
peg2pegtl.cpp
proto3.cpp
random_order.cpp
recover.cpp
Expand Down
37 changes: 37 additions & 0 deletions src/example/pegtl/peg.peg
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Parsing Expression Grammar (PEG) taken from
# https://pdos.csail.mit.edu/~baford/packrat/popl04/peg-popl04.pdf

# Hierarchical syntax
Grammar <- Spacing Definition+ EndOfFile
Definition <- Identifier LEFTARROW Expression
Expression <- Sequence (SLASH Sequence)*
Sequence <- Prefix*
Prefix <- (AND / NOT)? Suffix
Suffix <- Primary (QUESTION / STAR / PLUS)?
Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT

# Lexical syntax
Identifier <- IdentStart IdentCont* Spacing
IdentStart <- [a-zA-Z_]
IdentCont <- IdentStart / [0-9]
Literal <- ['] (!['] Char)* ['] Spacing / ["] (!["] Char)* ["] Spacing
Class <- '[' (!']' Range)* ']' Spacing
Range <- Char '-' Char / Char
Char <- '\\' [nrt'"\[\]\\] / '\\' [0-2][0-7][0-7] / '\\' [0-7][0-7]? / !'\\' .

LEFTARROW <- '<-' Spacing
SLASH <- '/' Spacing
AND <- '&' Spacing
NOT <- '!' Spacing
QUESTION <- '?' Spacing
STAR <- '*' Spacing
PLUS <- '+' Spacing
OPEN <- '(' Spacing
CLOSE <- ')' Spacing
DOT <- '.' Spacing

Spacing <- (Space / Comment)*
Comment <- '#' (!EndOfLine .)* EndOfLine
Space <- ' ' / '\t' / EndOfLine
EndOfLine <- '\r\n' / '\n' / '\r'
EndOfFile <- !.
Loading
Loading