From f6f1bd69f640d8891daab94014f1983d4d8dac3c Mon Sep 17 00:00:00 2001 From: Will Chandler Date: Sun, 18 Jul 2021 23:49:14 -0400 Subject: [PATCH] [lang] Add escape syntax for field names Related to https://github.com/rcoh/angle-grinder/issues/99 Currently field names containing a space or period, e.g. `date received` or `grpc.method`, cannot be parsed. This could be worked around using `jq` or similar tools to rewrite the field name, but that's a pain. This commit adds an escaped field name syntax of `[""]` which is based on the Object Identifier-Index syntax[0] used by `jq`, so it should be somewhat familiar to many people who parse JSON on the command line. The more obvious option of delimiting with just quotes, e.g. "date received", creates an ambiguity between string literals and escaped field names. For example, does `where foo == "date received"` mean field `foo` matches field `date received`, or field `foo` matches the string "date received"? Example query: ``` * | json | where ["grpc.method"] == "Foo" | count by ["date received"] ``` [0] https://stedolan.github.io/jq/manual/#ObjectIdentifier-Index:.foo,.foo.bar --- README.md | 8 ++++++++ src/lang.rs | 25 ++++++++++++++++++++++- tests/structured_tests/escaped_ident.toml | 13 ++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/structured_tests/escaped_ident.toml diff --git a/README.md b/README.md index 059b853..ed92ebb 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,14 @@ A simple query that operates on JSON logs and counts the number of logs per leve agrind '* | json | count by log_level' ``` +### Escaping Field Names + +Field names containing a space or period must be escaped using `[""]`: + +```bash +agrind '* | json | count by ["date received"], ["grpc.method"] +``` + ### Filters There are three basic filters: diff --git a/src/lang.rs b/src/lang.rs index fb2baf8..78946bb 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -366,6 +366,14 @@ fn is_ident(c: char) -> bool { is_alphanumeric(c as u8) || c == '_' } +fn is_escaped_ident(c: char) -> bool { + match c { + space if is_space(space as u8) => true, + '.' => true, + _ => is_ident(c), + } +} + fn starts_ident(c: char) -> bool { is_alphabetic(c as u8) || c == '_' } @@ -431,12 +439,20 @@ named!(column_ref, do_parse!( (Expr::Column { head: DataAccessAtom::Key(head), rest: rest }) )); -named!(ident, do_parse!( +named!(ident, alt!(bare_ident | escaped_ident)); + +named!(bare_ident, do_parse!( start: take_while1!(starts_ident) >> rest: take_while!(is_ident) >> (start.fragment.0.to_owned() + rest.fragment.0) )); +named!(escaped_ident, do_parse!( + start: preceded!(tag!("[\""), take_while1!(starts_ident)) >> + rest: terminated!(take_while!(is_escaped_ident), tag!("\"]")) >> + (start.fragment.0.to_owned() + rest.fragment.0) +)); + named!(arguments>, add_return_error!(SyntaxErrors::StartOfError.into(), delimited!( tag!("("), separated_list!(tag!(","), expr), @@ -1166,6 +1182,13 @@ mod tests { expect_fail!(ident, "5x"); } + #[test] + fn parse_quoted_ident() { + expect!(ident, "[\"hello world\"]", "hello world".to_string()); + expect!(ident, "[\"hello.world\"]", "hello.world".to_string()); + expect_fail!(ident, "\"\""); + } + #[test] fn parse_var_list() { expect!( diff --git a/tests/structured_tests/escaped_ident.toml b/tests/structured_tests/escaped_ident.toml new file mode 100644 index 0000000..2af42a8 --- /dev/null +++ b/tests/structured_tests/escaped_ident.toml @@ -0,0 +1,13 @@ +query = """ +* | json | count by ["grpc.method"], ["start time"] +""" +input = """ +{"start time": "today", "grpc.method": "Foo"} +{"start time": "today", "grpc.method": "Bar"} +""" +output = """ +["grpc.method"] ["start time"] _count +----------------------------------------------------------- +Bar today 1 +Foo today 1 +"""