From de806555a6c2868b53ab3b7edcaa0f53b9b19d9d Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 6 Nov 2024 19:04:56 +0100 Subject: [PATCH] style: rename `HexTokens` to `HexSubPattern` for consistency with CST. --- lib/src/compiler/ir/hex2hir.rs | 129 +++++++++++++++------------------ parser/src/ast/ascii_tree.rs | 5 +- parser/src/ast/cst2ast.rs | 8 +- parser/src/ast/mod.rs | 38 +++++++--- 4 files changed, 90 insertions(+), 90 deletions(-) diff --git a/lib/src/compiler/ir/hex2hir.rs b/lib/src/compiler/ir/hex2hir.rs index 12ca10da..1c4f5978 100644 --- a/lib/src/compiler/ir/hex2hir.rs +++ b/lib/src/compiler/ir/hex2hir.rs @@ -12,16 +12,20 @@ pub(in crate::compiler) fn hex_pattern_hir_from_ast( ctx: &mut CompileContext, pattern: &ast::HexPattern, ) -> Result { - hex_tokens_hir_from_ast(ctx, &pattern.identifier, &pattern.tokens) + hex_sub_pattern_hir_from_ast( + ctx, + &pattern.identifier, + &pattern.sub_patterns, + ) } -fn hex_tokens_hir_from_ast( +fn hex_sub_pattern_hir_from_ast( ctx: &mut CompileContext, pattern_ident: &ast::Ident, - tokens: &ast::HexTokens, + sub_pattern: &ast::HexSubPattern, ) -> Result { - let mut hir_tokens = Vec::with_capacity(tokens.tokens.len()); - let mut ast_tokens = tokens.tokens.iter().peekable(); + let mut hir_tokens = Vec::with_capacity(sub_pattern.len()); + let mut ast_tokens = sub_pattern.iter().peekable(); while let Some(token) = ast_tokens.next() { match token { @@ -63,7 +67,7 @@ fn hex_tokens_hir_from_ast( Vec::with_capacity(alt.alternatives.len()); for alt in &alt.as_ref().alternatives { - alternatives.push(hex_tokens_hir_from_ast( + alternatives.push(hex_sub_pattern_hir_from_ast( ctx, pattern_ident, alt, @@ -182,7 +186,7 @@ mod tests { use yara_x_parser::ast; use yara_x_parser::ast::{ - HexAlternative, HexJump, HexPattern, HexToken, HexTokens, Ident, + HexAlternative, HexJump, HexPattern, HexSubPattern, HexToken, Ident, }; use super::hex_byte_to_class; @@ -232,13 +236,11 @@ mod tests { let mut pattern = HexPattern { identifier: Ident::new("test_ident"), - tokens: HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(b'a', 0xff)), - HexToken::Byte(ast::HexByte::new(b'b', 0xff)), - HexToken::Byte(ast::HexByte::new(b'c', 0xff)), - ], - }, + sub_patterns: HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(b'a', 0xff)), + HexToken::Byte(ast::HexByte::new(b'b', 0xff)), + HexToken::Byte(ast::HexByte::new(b'c', 0xff)), + ]), ..Default::default() }; @@ -247,29 +249,25 @@ mod tests { Ok(Hir::literal("abc".as_bytes())) ); - pattern.tokens = HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(0x01, 0xff)), - HexToken::Byte(ast::HexByte::new(0x02, 0xff)), - HexToken::Byte(ast::HexByte::new(0x03, 0xff)), - ], - }; + pattern.sub_patterns = HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(0x01, 0xff)), + HexToken::Byte(ast::HexByte::new(0x02, 0xff)), + HexToken::Byte(ast::HexByte::new(0x03, 0xff)), + ]); assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), Ok(Hir::literal([0x01, 0x02, 0x03])) ); - pattern.tokens = HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(0x01, 0xff)), - HexToken::Byte(ast::HexByte::new(0x02, 0xff)), - HexToken::Byte(ast::HexByte::new(0x03, 0xff)), - HexToken::Byte(ast::HexByte::new(0x00, 0x00)), - HexToken::Byte(ast::HexByte::new(0x05, 0xff)), - HexToken::Byte(ast::HexByte::new(0x06, 0xff)), - ], - }; + pattern.sub_patterns = HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(0x01, 0xff)), + HexToken::Byte(ast::HexByte::new(0x02, 0xff)), + HexToken::Byte(ast::HexByte::new(0x03, 0xff)), + HexToken::Byte(ast::HexByte::new(0x00, 0x00)), + HexToken::Byte(ast::HexByte::new(0x05, 0xff)), + HexToken::Byte(ast::HexByte::new(0x06, 0xff)), + ]); assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), @@ -280,13 +278,11 @@ mod tests { ])) ); - pattern.tokens = HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(0x01, 0xff)), - HexToken::NotByte(ast::HexByte::new(0x02, 0xff)), - HexToken::Byte(ast::HexByte::new(0x03, 0xff)), - ], - }; + pattern.sub_patterns = HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(0x01, 0xff)), + HexToken::NotByte(ast::HexByte::new(0x02, 0xff)), + HexToken::Byte(ast::HexByte::new(0x03, 0xff)), + ]); assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), @@ -300,13 +296,11 @@ mod tests { ])) ); - pattern.tokens = HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(0x01, 0xff)), - HexToken::NotByte(ast::HexByte::new(0x40, 0xfe)), - HexToken::Byte(ast::HexByte::new(0x03, 0xff)), - ], - }; + pattern.sub_patterns = HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(0x01, 0xff)), + HexToken::NotByte(ast::HexByte::new(0x40, 0xfe)), + HexToken::Byte(ast::HexByte::new(0x03, 0xff)), + ]); assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), @@ -320,22 +314,17 @@ mod tests { ])) ); - pattern.tokens = HexTokens { - tokens: vec![HexToken::Alternative(Box::new( - HexAlternative::new(vec![ - HexTokens { - tokens: vec![HexToken::Byte(ast::HexByte::new( - 0x01, 0xff, - ))], - }, - HexTokens { - tokens: vec![HexToken::Byte(ast::HexByte::new( - 0x02, 0xff, - ))], - }, - ]), - ))], - }; + pattern.sub_patterns = HexSubPattern(vec![HexToken::Alternative( + Box::new(HexAlternative::new(vec![ + HexSubPattern(vec![HexToken::Byte(ast::HexByte::new( + 0x01, 0xff, + ))]), + HexSubPattern(vec![HexToken::Byte(ast::HexByte::new( + 0x02, 0xff, + ))]), + ])), + )]); + assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), Ok(Hir::alternation(vec![ @@ -344,16 +333,14 @@ mod tests { ])) ); - pattern.tokens = HexTokens { - tokens: vec![ - HexToken::Byte(ast::HexByte::new(0x01, 0xff)), - HexToken::Byte(ast::HexByte::new(0x02, 0xff)), - HexToken::Byte(ast::HexByte::new(0x03, 0xff)), - HexToken::Jump(HexJump::new(None, None)), - HexToken::Byte(ast::HexByte::new(0x05, 0xff)), - HexToken::Byte(ast::HexByte::new(0x06, 0xff)), - ], - }; + pattern.sub_patterns = HexSubPattern(vec![ + HexToken::Byte(ast::HexByte::new(0x01, 0xff)), + HexToken::Byte(ast::HexByte::new(0x02, 0xff)), + HexToken::Byte(ast::HexByte::new(0x03, 0xff)), + HexToken::Jump(HexJump::new(None, None)), + HexToken::Byte(ast::HexByte::new(0x05, 0xff)), + HexToken::Byte(ast::HexByte::new(0x06, 0xff)), + ]); assert_eq!( super::hex_pattern_hir_from_ast(&mut ctx, &pattern), diff --git a/parser/src/ast/ascii_tree.rs b/parser/src/ast/ascii_tree.rs index 79d536f3..c1abae6e 100644 --- a/parser/src/ast/ascii_tree.rs +++ b/parser/src/ast/ascii_tree.rs @@ -485,7 +485,7 @@ pub(crate) fn pattern_ascii_tree(pattern: &Pattern) -> Tree { )]), Pattern::Hex(h) => Node( h.identifier.name.to_string(), - vec![hex_tokens_ascii_tree(&h.tokens)], + vec![hex_tokens_ascii_tree(&h.sub_patterns)], ), Pattern::Regexp(r) => Leaf(vec![format!( "{} = /{}/{}{} {}", @@ -498,9 +498,8 @@ pub(crate) fn pattern_ascii_tree(pattern: &Pattern) -> Tree { } } -pub(crate) fn hex_tokens_ascii_tree(tokens: &HexTokens) -> Tree { +pub(crate) fn hex_tokens_ascii_tree(tokens: &HexSubPattern) -> Tree { let nodes = tokens - .tokens .iter() .map(|t| match t { HexToken::Byte(b) => { diff --git a/parser/src/ast/cst2ast.rs b/parser/src/ast/cst2ast.rs index 5ff14e3c..11095876 100644 --- a/parser/src/ast/cst2ast.rs +++ b/parser/src/ast/cst2ast.rs @@ -643,7 +643,7 @@ impl<'src> Builder<'src> { Pattern::Hex(Box::new(HexPattern { identifier, - tokens, + sub_patterns: tokens, modifiers, })) } @@ -753,7 +753,7 @@ impl<'src> Builder<'src> { Ok(PatternModifiers::new(modifiers)) } - fn hex_pattern(&mut self) -> Result { + fn hex_pattern(&mut self) -> Result { self.begin(HEX_PATTERN)?; self.expect(L_BRACE)?; @@ -765,7 +765,7 @@ impl<'src> Builder<'src> { Ok(sub_pattern) } - fn hex_sub_pattern(&mut self) -> Result { + fn hex_sub_pattern(&mut self) -> Result { self.begin(HEX_SUB_PATTERN)?; let mut sub_patterns = Vec::new(); @@ -820,7 +820,7 @@ impl<'src> Builder<'src> { self.end(HEX_SUB_PATTERN)?; - Ok(HexTokens { tokens: sub_patterns }) + Ok(HexSubPattern(sub_patterns)) } fn hex_alternative(&mut self) -> Result { diff --git a/parser/src/ast/mod.rs b/parser/src/ast/mod.rs index 521a9123..5abc9e9d 100644 --- a/parser/src/ast/mod.rs +++ b/parser/src/ast/mod.rs @@ -238,15 +238,29 @@ pub struct RegexpPattern<'src> { #[derive(Debug, Default)] pub struct HexPattern<'src> { pub identifier: Ident<'src>, - pub tokens: HexTokens, + pub sub_patterns: HexSubPattern, pub modifiers: PatternModifiers<'src>, } /// A sequence of tokens that conform a hex pattern (a.k.a. hex string). #[derive(Debug, Default)] -pub struct HexTokens { - // TODO: rename to HexSubPattern - pub tokens: Vec, +pub struct HexSubPattern(pub Vec); + +impl HexSubPattern { + #[inline] + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } + + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } /// Each of the types of tokens in a hex pattern (a.k.a. hex string). @@ -287,16 +301,16 @@ impl HexByte { /// An alternative in a hex pattern (a.k.a. hex string). /// -/// Alternatives are sequences of hex tokens separated by `|`. +/// Alternatives are sequences of hex sub-patterns separated by `|`. #[derive(Debug, Default)] pub struct HexAlternative { span: Span, - pub alternatives: Vec, + pub alternatives: Vec, } impl HexAlternative { #[doc(hidden)] - pub fn new(alternatives: Vec) -> Self { + pub fn new(alternatives: Vec) -> Self { Self { alternatives, span: Span::default() } } } @@ -1012,13 +1026,13 @@ impl WithSpan for HexToken { } } -impl WithSpan for HexTokens { +impl WithSpan for HexSubPattern { fn span(&self) -> Span { - let span = self.tokens.first().map(|t| t.span()).unwrap_or_default(); - if self.tokens.len() == 1 { + let span = self.0.first().map(|t| t.span()).unwrap_or_default(); + if self.0.len() == 1 { return span; } - span.combine(&self.tokens.last().map(|t| t.span()).unwrap_or_default()) + span.combine(&self.0.last().map(|t| t.span()).unwrap_or_default()) } } @@ -1124,7 +1138,7 @@ impl WithSpan for TextPattern<'_> { impl WithSpan for HexPattern<'_> { fn span(&self) -> Span { if self.modifiers.is_empty() { - self.identifier.span().combine(&self.tokens.span()) + self.identifier.span().combine(&self.sub_patterns.span()) } else { self.identifier.span().combine(&self.modifiers.span()) }