From 42e8f24455ba9fa40c78763223aa93c43890770d Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:01:29 -0400 Subject: [PATCH 1/8] chore: :wrench: update root Cargo.toml Updated `workspace.resolver` to latest --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 529c35d..003c782 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] +resolver = "2" members = [ "compiler", ] From b08ba4528415813f53455abc5f42c7ebd5e7298e Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:05:26 -0400 Subject: [PATCH 2/8] fix: :rotating_light: silence linter warnings --- compiler/src/frontend/parser.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler/src/frontend/parser.rs b/compiler/src/frontend/parser.rs index 1cb2ccd..768960e 100644 --- a/compiler/src/frontend/parser.rs +++ b/compiler/src/frontend/parser.rs @@ -74,6 +74,7 @@ macro_rules! terminal { }; } +#[allow(unused_macros)] macro_rules! non_terminal { ($kind:expr, $children:expr) => { ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -211,6 +212,7 @@ impl LeekParser { } /// Peeks the nth token or returns an error if there are none left + #[allow(unused)] fn peek_nth_expect(&self, n: usize) -> Result<&LeekToken, LeekCompilerError> { self.lexer.peek_nth(n)?.ok_or_else(|| { ParserError { From 674e81d47357e03617b0a914f6c1361d3c4a96d2 Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:05:41 -0400 Subject: [PATCH 3/8] fix: :bug: change def for AST StructMethodCall Change StructMethodCall to contain a "callee" of type Box instead of an "identifier" of type QualifiedIdentifier because only function calls are limited to named identifiers. Method call expressions should not be limited in what they can be called on. --- compiler/src/frontend/ast/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/frontend/ast/mod.rs b/compiler/src/frontend/ast/mod.rs index cd66cb0..40320ec 100644 --- a/compiler/src/frontend/ast/mod.rs +++ b/compiler/src/frontend/ast/mod.rs @@ -213,7 +213,7 @@ pub struct StructFieldAccess { #[derive(Debug, PartialEq)] pub struct StructMethodCall { - pub identifier: QualifiedIdentifier, + pub callee: Box, pub method: String, pub arguments: Vec, } From 157e98bc3938e0bad6cf2585ea4659d2937b2734 Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:18:59 -0400 Subject: [PATCH 4/8] chore: :arrow_up: upgrade deps for nightly 1.73.0 --- Cargo.lock | 117 ++++++++++++++++++++------------------------ compiler/Cargo.toml | 4 +- 2 files changed, 55 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aff5c8a..e7e5054 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6342bd4f5a1205d7f41e94a41a901f5647c938cdfa96036338e8533c9d6c2450" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", @@ -28,15 +28,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" [[package]] name = "anstyle-parse" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" dependencies = [ "utf8parse", ] @@ -62,15 +62,18 @@ dependencies = [ [[package]] name = "bitflags" -version = "1.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "6c6b2562119bf28c3439f7f02db99faf0aa1a8cdfe5772a2ee155d32227239f0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -80,9 +83,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.2.4" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956ac1f6381d8d82ab4684768f89c0ea3afe66925ceadb4eeb3fc452ffc55d62" +checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d" dependencies = [ "clap_builder", "clap_derive", @@ -91,22 +94,21 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.2.4" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84080e799e54cff944f4b4a4b0e71630b0e0443b25b985175c7dddc1a859b749" +checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" dependencies = [ "anstream", "anstyle", - "bitflags", "clap_lex", "strsim", ] [[package]] name = "clap_derive" -version = "4.2.0" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", "proc-macro2", @@ -116,9 +118,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" [[package]] name = "colorchoice" @@ -134,15 +136,15 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "errno" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", @@ -161,9 +163,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -178,44 +180,32 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "indoc" -version = "2.0.1" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f2cb48b81b1dc9f39676bf99f5499babfec7cd8fe14307f7b3d747208fb5690" - -[[package]] -name = "io-lifetimes" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] +checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", - "io-lifetimes", "rustix", "windows-sys", ] [[package]] name = "itertools" -version = "0.10.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ "either", ] @@ -234,15 +224,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.142" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "linux-raw-sys" -version = "0.3.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" [[package]] name = "mktemp" @@ -255,37 +245,36 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] [[package]] name = "rustix" -version = "0.37.14" +version = "0.38.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b864d3c18a5785a05953adeed93e2dca37ed30f18e69bba9f30079d51f363f" +checksum = "172891ebdceb05aa0005f533a6cbfca599ddd7d966f6f5d4d9b2e70478e70399" dependencies = [ "bitflags", "errno", - "io-lifetimes", "libc", "linux-raw-sys", "windows-sys", @@ -299,9 +288,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "2.0.15" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -310,9 +299,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "utf8parse" @@ -368,9 +357,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 7823689..346a43e 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -7,8 +7,8 @@ edition = "2021" [dependencies] ansi_term = "0.12.1" -clap = { version = "4.1.6", features = ["derive"] } +clap = { version = "4.3.19", features = ["derive"] } diff = "0.1.13" indoc = "2.0.1" -itertools = "0.10.5" +itertools = "0.11.0" mktemp = "0.5.0" From 8140bd9acf511c4158e2a9ad79f7dacab3c9e740 Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:38:13 -0400 Subject: [PATCH 5/8] refactor: :recycle: redefine root AST structure Changed LeekAst to contain a list of `ProgramPart`s instead of a single struct that contains multiple buckets for program definitions. In hindsight that original definition was a terrible idea and would have caused problems later down the line. --- compiler/src/frontend/ast/builder.rs | 455 ++++++++++++--------------- compiler/src/frontend/ast/mod.rs | 16 +- 2 files changed, 210 insertions(+), 261 deletions(-) diff --git a/compiler/src/frontend/ast/builder.rs b/compiler/src/frontend/ast/builder.rs index e6669d3..cf99243 100644 --- a/compiler/src/frontend/ast/builder.rs +++ b/compiler/src/frontend/ast/builder.rs @@ -15,7 +15,7 @@ use crate::{ }; use super::{ - Expression, IntegerKind, LeekAst, Literal, LiteralKind, PrimitiveKind, Program, + Expression, IntegerKind, LeekAst, Literal, LiteralKind, PrimitiveKind, ProgramPart, QualifiedIdentifier, Type, VariableAssignment, }; @@ -25,17 +25,9 @@ impl LeekAst { /// This function is infallible. If there is an error, it is due to a bug in the parser or the builder. /// As such, this function will panic if there is an error. pub fn build_from(parse_tree: ParseTree) -> Self { - let root = Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![], - struct_definitions: vec![], - enum_definitions: vec![], - }; - let mut ast = Self { source_file: parse_tree.source_file.clone(), - root, + items: Vec::new(), }; ast.populate(parse_tree); @@ -49,26 +41,31 @@ impl LeekAst { for node in &program.children { let ParseTreeNode::NonTerminal(top_level_node) = node else { - panic!("Expected top level node to be non-terminal, found {:?}", node); + panic!( + "Expected top level node to be non-terminal, found {:?}", + node + ); }; match top_level_node.kind { - ParseTreeNonTerminalKind::ConstantVariableDeclaration => self - .root - .constant_variables - .push(VariableDeclaration::from_node(top_level_node)), - ParseTreeNonTerminalKind::StaticVariableDeclaration => self - .root - .static_variables - .push(VariableDeclaration::from_node(top_level_node)), - ParseTreeNonTerminalKind::FunctionDefinition => self - .root - .function_definitions - .push(FunctionDefinition::from_node(top_level_node)), - ParseTreeNonTerminalKind::StructDefinition => self - .root - .struct_definitions - .push(StructDefinition::from_node(top_level_node)), + ParseTreeNonTerminalKind::ConstantVariableDeclaration => self.items.push( + ProgramPart::ConstantVariable(VariableDeclaration::from_node(top_level_node)), + ), + ParseTreeNonTerminalKind::StaticVariableDeclaration => { + self.items + .push(ProgramPart::StaticVariable(VariableDeclaration::from_node( + top_level_node, + ))) + } + ParseTreeNonTerminalKind::FunctionDefinition => self.items.push( + ProgramPart::FunctionDefinition(FunctionDefinition::from_node(top_level_node)), + ), + ParseTreeNonTerminalKind::StructDefinition => { + self.items + .push(ProgramPart::StructDefinition(StructDefinition::from_node( + top_level_node, + ))) + } _ => panic!("Unexpected top level node: {:?}", top_level_node), } } @@ -288,8 +285,8 @@ impl FromNode for Expression { impl From for IntegerKind { fn from(value: LeekToken) -> Self { let LeekTokenKind::IntegerLiteral(integer) = value.kind else { - panic!("Expected integer literal, found {:?}", value.kind) - }; + panic!("Expected integer literal, found {:?}", value.kind) + }; // TODO: add support for type specifiers like `u32` and `i32` @@ -799,30 +796,24 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::FunctionCall(FunctionCallExpression { - identifier: QualifiedIdentifier::new(None, "println".to_owned()), - arguments: vec![Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::String("\"Hello, world!\"".to_owned()), - span: Span::new( - Position { row: 1, col: 12 }, - Position { row: 1, col: 27 }, - ), - }))], - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::FunctionCall(FunctionCallExpression { + identifier: QualifiedIdentifier::new(None, "println".to_owned()), + arguments: vec![Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::String("\"Hello, world!\"".to_owned()), + span: Span::new( + Position { row: 1, col: 12 }, + Position { row: 1, col: 27 }, + ), + }))], + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -845,32 +836,26 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::Block(Block { - statements: vec![Statement::FunctionCall(FunctionCallExpression { - identifier: QualifiedIdentifier::new(None, "println".to_owned()), - arguments: vec![Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::String("\"Hello, world!\"".to_owned()), - span: Span::new( - Position { row: 2, col: 16 }, - Position { row: 2, col: 31 }, - ), - }))], - })], + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::Block(Block { + statements: vec![Statement::FunctionCall(FunctionCallExpression { + identifier: QualifiedIdentifier::new(None, "println".to_owned()), + arguments: vec![Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::String("\"Hello, world!\"".to_owned()), + span: Span::new( + Position { row: 2, col: 16 }, + Position { row: 2, col: 31 }, + ), + }))], })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -891,32 +876,26 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(100)), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 16 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(100)), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 16 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -937,31 +916,25 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableAssignment(VariableAssignment { - identifier: QualifiedIdentifier::new(None, "a".to_owned()), - operator: AssignmentOperator::PlusEquals, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(420)), - span: Span::new( - Position { row: 1, col: 9 }, - Position { row: 1, col: 12 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableAssignment(VariableAssignment { + identifier: QualifiedIdentifier::new(None, "a".to_owned()), + operator: AssignmentOperator::PlusEquals, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(420)), + span: Span::new( + Position { row: 1, col: 9 }, + Position { row: 1, col: 12 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -982,40 +955,34 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "add".to_owned(), - struct_identifier: None, - parameters: vec![ - FunctionParameter { - identifier: "a".to_owned(), - ty: Type::Primitive(PrimitiveKind::I32), - }, - FunctionParameter { - identifier: "b".to_owned(), - ty: Type::Primitive(PrimitiveKind::I32), - }, - ], - return_type: Type::Primitive(PrimitiveKind::I32), - body: Block { - statements: vec![Statement::Yeet(Expression::BinaryExpression( - BinaryExpression { - binary_operator: BinaryOperator::Plus, - lhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( - QualifiedIdentifier::new(None, "a".to_owned()), - ))), - rhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( - QualifiedIdentifier::new(None, "b".to_owned()), - ))), - }, - ))], + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "add".to_owned(), + struct_identifier: None, + parameters: vec![ + FunctionParameter { + identifier: "a".to_owned(), + ty: Type::Primitive(PrimitiveKind::I32), }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + FunctionParameter { + identifier: "b".to_owned(), + ty: Type::Primitive(PrimitiveKind::I32), + }, + ], + return_type: Type::Primitive(PrimitiveKind::I32), + body: Block { + statements: vec![Statement::Yeet(Expression::BinaryExpression( + BinaryExpression { + binary_operator: BinaryOperator::Plus, + lhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( + QualifiedIdentifier::new(None, "a".to_owned()), + ))), + rhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( + QualifiedIdentifier::new(None, "b".to_owned()), + ))), + }, + ))], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1036,32 +1003,26 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Char('b'), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 16 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Char('b'), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 16 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1082,35 +1043,29 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::UnaryExpression(UnaryExpression { - unary_operator: UnaryOperator::BitwiseNot, - expression: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(69)), - span: Span::new( - Position { row: 1, col: 14 }, - Position { row: 1, col: 16 }, - ), - }))), - }), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::UnaryExpression(UnaryExpression { + unary_operator: UnaryOperator::BitwiseNot, + expression: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(69)), + span: Span::new( + Position { row: 1, col: 14 }, + Position { row: 1, col: 16 }, + ), + }))), + }), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1131,42 +1086,36 @@ mod tests { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::BinaryExpression(BinaryExpression { - binary_operator: BinaryOperator::Minus, - lhs: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(69)), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 15 }, - ), - }))), - rhs: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(420)), - span: Span::new( - Position { row: 1, col: 18 }, - Position { row: 1, col: 21 }, - ), - }))), - }), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::BinaryExpression(BinaryExpression { + binary_operator: BinaryOperator::Minus, + lhs: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(69)), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 15 }, + ), + }))), + rhs: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(420)), + span: Span::new( + Position { row: 1, col: 18 }, + Position { row: 1, col: 21 }, + ), + }))), + }), + })], + }, + })], }; assert_ast_eq!(ast, expected); diff --git a/compiler/src/frontend/ast/mod.rs b/compiler/src/frontend/ast/mod.rs index 40320ec..3174a66 100644 --- a/compiler/src/frontend/ast/mod.rs +++ b/compiler/src/frontend/ast/mod.rs @@ -7,22 +7,22 @@ pub mod builder; #[derive(Debug)] pub struct LeekAst { pub source_file: SourceFile, - pub root: Program, + pub items: Vec, } impl PartialEq for LeekAst { fn eq(&self, other: &Self) -> bool { - self.root == other.root + self.items == other.items } } #[derive(Debug, PartialEq)] -pub struct Program { - pub constant_variables: Vec, - pub static_variables: Vec, - pub function_definitions: Vec, - pub struct_definitions: Vec, - pub enum_definitions: Vec, +pub enum ProgramPart { + ConstantVariable(VariableDeclaration), + StaticVariable(VariableDeclaration), + FunctionDefinition(FunctionDefinition), + StructDefinition(StructDefinition), + EnumDefinition(EnumDefinition), } #[derive(Debug, PartialEq)] From 1ddcbc82a3082b84d05a361c61ea50f4c1058f67 Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 19:25:44 -0400 Subject: [PATCH 6/8] refactor: :recycle: remove "Leek" in all naming Removed the "Leek" prefix on a lot of struct names within the project. Since everything is alredy namespaced, it doesnt make any sense to do that. Also removed the unnecessary traits for the Lexer and Parser since there is only ever one implementation for these things. --- compiler/src/backend/codegen/mod.rs | 6 +- .../src/backend/codegen/x86_linux_gnu/mod.rs | 4 +- compiler/src/backend/mod.rs | 12 +- compiler/src/common/config.rs | 2 +- compiler/src/common/error.rs | 30 +- compiler/src/frontend/ast/builder.rs | 162 +++-- compiler/src/frontend/ast/mod.rs | 4 +- compiler/src/frontend/lexer.rs | 165 ++--- compiler/src/frontend/mod.rs | 26 +- compiler/src/frontend/parser.rs | 619 ++++++++---------- compiler/src/main.rs | 14 +- 11 files changed, 479 insertions(+), 565 deletions(-) diff --git a/compiler/src/backend/codegen/mod.rs b/compiler/src/backend/codegen/mod.rs index 947e2ad..b6ee998 100644 --- a/compiler/src/backend/codegen/mod.rs +++ b/compiler/src/backend/codegen/mod.rs @@ -1,16 +1,16 @@ use std::{path::Path, process::Command, str::FromStr}; -use crate::frontend::ast::LeekAst; +use crate::frontend::ast::Ast; use self::x86_linux_gnu::CodeGeneratorX86LinuxGNU; -use super::LeekCompilerConfig; +use super::CompilerConfig; pub mod x86_64_linux_gnu; pub mod x86_linux_gnu; pub trait CodeGenerator { - fn generate_assembly(&self, ast: LeekAst, compiler_options: &LeekCompilerConfig) -> String; + fn generate_assembly(&self, ast: Ast, compiler_options: &CompilerConfig) -> String; fn create_assembler_command(&self, input_file: &Path, output_file: &Path) -> Command; fn create_linker_command(&self, input_file: &Path, output_file: &Path) -> Command; } diff --git a/compiler/src/backend/codegen/x86_linux_gnu/mod.rs b/compiler/src/backend/codegen/x86_linux_gnu/mod.rs index ca48d15..f98bff2 100644 --- a/compiler/src/backend/codegen/x86_linux_gnu/mod.rs +++ b/compiler/src/backend/codegen/x86_linux_gnu/mod.rs @@ -1,14 +1,14 @@ use indoc::indoc; use std::{path::Path, process::Command}; -use crate::{backend::LeekCompilerConfig, frontend::ast::LeekAst}; +use crate::{backend::CompilerConfig, frontend::ast::Ast}; use super::CodeGenerator; pub struct CodeGeneratorX86LinuxGNU; impl CodeGenerator for CodeGeneratorX86LinuxGNU { - fn generate_assembly(&self, _ast: LeekAst, _compiler_options: &LeekCompilerConfig) -> String { + fn generate_assembly(&self, _ast: Ast, _compiler_options: &CompilerConfig) -> String { String::from(indoc! {" global main diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs index 842bcac..69d1039 100644 --- a/compiler/src/backend/mod.rs +++ b/compiler/src/backend/mod.rs @@ -4,10 +4,10 @@ use itertools::Itertools; use crate::{ common::{ - config::{EmitMode, LeekCompilerConfig}, - error::LeekCompilerError, + config::{CompilerConfig, EmitMode}, + error::CompilerError, }, - frontend::ast::LeekAst, + frontend::ast::Ast, }; use self::codegen::{CodeGenTarget, CodeGenerator}; @@ -33,10 +33,10 @@ macro_rules! display_buffer { } pub fn compile_ast( - ast: LeekAst, - compiler_options: &LeekCompilerConfig, + ast: Ast, + compiler_options: &CompilerConfig, target: CodeGenTarget, -) -> Result<(), LeekCompilerError> { +) -> Result<(), CompilerError> { let code_generator = target.get_code_generator(); // If the output name is specified, use that. diff --git a/compiler/src/common/config.rs b/compiler/src/common/config.rs index 0d6c2f8..326d449 100644 --- a/compiler/src/common/config.rs +++ b/compiler/src/common/config.rs @@ -41,7 +41,7 @@ pub enum EmitMode { AssemblyFile, } -pub struct LeekCompilerConfig { +pub struct CompilerConfig { pub opt_level: OptimizationLevel, pub build_mode: BuildMode, pub emit_mode: EmitMode, diff --git a/compiler/src/common/error.rs b/compiler/src/common/error.rs index 9a446fe..27bccc7 100644 --- a/compiler/src/common/error.rs +++ b/compiler/src/common/error.rs @@ -9,7 +9,7 @@ use crate::{ // TODO: Refactor with thiserror #[derive(Debug)] -pub enum LeekCompilerError { +pub enum CompilerError { FileReadError(FileReadError), // File -> Chars LexerError(LexerError), // Chars -> Tokens ParserError(ParserError), // Tokens -> Parse Tree @@ -19,7 +19,7 @@ pub enum LeekCompilerError { CodeGenError(CodeGenError), // LIR -> ASM } -impl LeekCompilerError { +impl CompilerError { /// Should print to the stderr and exit with a non-zero exit code pub fn report(&self) -> ! { eprintln!("{self}"); @@ -28,18 +28,18 @@ impl LeekCompilerError { } } -impl Display for LeekCompilerError { +impl Display for CompilerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - LeekCompilerError::FileReadError(e) => write!(f, "File Read Error: \n{e}"), - LeekCompilerError::LexerError(e) => write!(f, "Lexer Error: \n{e}"), - LeekCompilerError::ParserError(e) => { + CompilerError::FileReadError(e) => write!(f, "File Read Error: \n{e}"), + CompilerError::LexerError(e) => write!(f, "Lexer Error: \n{e}"), + CompilerError::ParserError(e) => { write!( f, "Parser Error: {e}\n=================================\n\n{e:#?}\n" ) } - LeekCompilerError::TypeCheckingError(e) => { + CompilerError::TypeCheckingError(e) => { write!( f, "Type Error: \n{e}\n=================================\n\n{e:#?}\n" @@ -51,26 +51,26 @@ impl Display for LeekCompilerError { } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: FileReadError) -> Self { - LeekCompilerError::FileReadError(error) + CompilerError::FileReadError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: LexerError) -> Self { - LeekCompilerError::LexerError(error) + CompilerError::LexerError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: ParserError) -> Self { - LeekCompilerError::ParserError(error) + CompilerError::ParserError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: CodeGenError) -> Self { - LeekCompilerError::CodeGenError(error) + CompilerError::CodeGenError(error) } } diff --git a/compiler/src/frontend/ast/builder.rs b/compiler/src/frontend/ast/builder.rs index cf99243..94b99d2 100644 --- a/compiler/src/frontend/ast/builder.rs +++ b/compiler/src/frontend/ast/builder.rs @@ -9,19 +9,19 @@ use crate::{ StructInitialization, StructMethodCall, UnaryExpression, VariableDeclaration, VariableDeclarationKind, }, - lexer::{IntegerLiteralKind, KeywordKind, LeekToken, LeekTokenKind}, + lexer::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, parser::{ParseTree, ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind}, }, }; use super::{ - Expression, IntegerKind, LeekAst, Literal, LiteralKind, PrimitiveKind, ProgramPart, + Ast, Expression, IntegerKind, Literal, LiteralKind, PrimitiveKind, ProgramPart, QualifiedIdentifier, Type, VariableAssignment, }; // TODO: Add spans for ast nodes -impl LeekAst { +impl Ast { /// This function is infallible. If there is an error, it is due to a bug in the parser or the builder. /// As such, this function will panic if there is an error. pub fn build_from(parse_tree: ParseTree) -> Self { @@ -102,7 +102,7 @@ trait FromTerminal where Self: Sized, { - fn from_terminal(node: &LeekToken) -> Self; + fn from_terminal(node: &Token) -> Self; } impl FromNode for Type { @@ -195,23 +195,20 @@ impl FromNode for VariableDeclaration { }; assert!(&[ - LeekTokenKind::Keyword(KeywordKind::Leak), - LeekTokenKind::Keyword(KeywordKind::Hold), - LeekTokenKind::Keyword(KeywordKind::Perm) + TokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Hold), + TokenKind::Keyword(KeywordKind::Perm) ] .contains(&node.children[0].terminal_token().kind)); let identifier = &node.children[1].terminal_token(); - assert_eq!(identifier.kind, LeekTokenKind::Identifier); + assert_eq!(identifier.kind, TokenKind::Identifier); let identifier = identifier.text.clone(); - assert_eq!( - node.children[2].terminal_token().kind, - LeekTokenKind::Equals - ); + assert_eq!(node.children[2].terminal_token().kind, TokenKind::Equals); if let ParseTreeNode::Terminal(terminal) = &node.children[3] { - if terminal.kind == LeekTokenKind::Colon { + if terminal.kind == TokenKind::Colon { todo!("Parse leak with explicit type") } else { unreachable!("Terminal token in leak statement was not a colon") @@ -282,9 +279,9 @@ impl FromNode for Expression { } } -impl From for IntegerKind { - fn from(value: LeekToken) -> Self { - let LeekTokenKind::IntegerLiteral(integer) = value.kind else { +impl From for IntegerKind { + fn from(value: Token) -> Self { + let TokenKind::IntegerLiteral(integer) = value.kind else { panic!("Expected integer literal, found {:?}", value.kind) }; @@ -311,25 +308,25 @@ impl FromNode for Atom { let atom = match &node.children[0] { ParseTreeNode::Terminal(terminal) => match terminal.kind { - LeekTokenKind::StringLiteral => Atom::Literal(Literal { + TokenKind::StringLiteral => Atom::Literal(Literal { kind: LiteralKind::String(terminal.text.clone()), span: terminal.span.clone(), }), - LeekTokenKind::CharLiteral => Atom::Literal(Literal { + TokenKind::CharLiteral => Atom::Literal(Literal { kind: LiteralKind::Char(terminal.text.chars().collect::>()[1]), span: terminal.span.clone(), }), - LeekTokenKind::IntegerLiteral(_) => Atom::Literal(Literal { + TokenKind::IntegerLiteral(_) => Atom::Literal(Literal { kind: LiteralKind::Integer(IntegerKind::from(terminal.clone())), span: terminal.span.clone(), }), - LeekTokenKind::FloatLiteral => todo!(), - LeekTokenKind::OpenParen => { + TokenKind::FloatLiteral => todo!(), + TokenKind::OpenParen => { let expression = Expression::from_node(node.children[1].non_terminal()); assert_eq!( node.children[2].terminal_token().kind, - LeekTokenKind::CloseParen + TokenKind::CloseParen ); Atom::ParenthesizedExpression(Box::new(expression)) @@ -351,11 +348,11 @@ impl FromNode for Atom { } impl FromTerminal for UnaryOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::BitwiseNot => Self::BitwiseNot, - LeekTokenKind::LogicalNot => Self::LogicalNot, - LeekTokenKind::Asterisk => Self::Asterisk, + TokenKind::BitwiseNot => Self::BitwiseNot, + TokenKind::LogicalNot => Self::LogicalNot, + TokenKind::Asterisk => Self::Asterisk, _ => unreachable!("Invalid binary operator {:?}", node.kind), } } @@ -390,20 +387,17 @@ impl FromNode for FunctionCallExpression { let identifier = QualifiedIdentifier::from_node(node.children[0].non_terminal()); - assert_eq!( - node.children[1].terminal_token().kind, - LeekTokenKind::OpenParen - ); + assert_eq!(node.children[1].terminal_token().kind, TokenKind::OpenParen); let arguments = match &node.children[2] { ParseTreeNode::Terminal(terminal) => { - assert_eq!(terminal.kind, LeekTokenKind::CloseParen); + assert_eq!(terminal.kind, TokenKind::CloseParen); Vec::new() } ParseTreeNode::NonTerminal(non_terminal) => { assert_eq!( node.children[3].terminal_token().kind, - LeekTokenKind::CloseParen + TokenKind::CloseParen ); assert_nt_kind(non_terminal, ParseTreeNonTerminalKind::FunctionArguments); @@ -412,7 +406,7 @@ impl FromNode for FunctionCallExpression { for (index, argument) in non_terminal.children.iter().enumerate() { if index % 2 == 1 { - assert_eq!(argument.terminal_token().kind, LeekTokenKind::Comma); + assert_eq!(argument.terminal_token().kind, TokenKind::Comma); continue; } @@ -433,26 +427,26 @@ impl FromNode for FunctionCallExpression { } impl FromTerminal for BinaryOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::DoubleEquals => Self::DoubleEquals, - LeekTokenKind::LessThan => Self::LessThan, - LeekTokenKind::LessThanOrEqual => Self::LessThanOrEqual, - LeekTokenKind::GreaterThan => Self::GreaterThan, - LeekTokenKind::GreaterThanOrEqual => Self::GreaterThanOrEqual, - LeekTokenKind::Plus => Self::Plus, - LeekTokenKind::Minus => Self::Minus, - LeekTokenKind::Asterisk => Self::Asterisk, - LeekTokenKind::Divide => Self::Divide, - LeekTokenKind::Modulo => Self::Modulo, - LeekTokenKind::BitwiseXor => Self::BitwiseXor, - LeekTokenKind::BitwiseOr => Self::BitwiseOr, - LeekTokenKind::BitwiseAnd => Self::BitwiseAnd, - LeekTokenKind::Exponentiation => Self::Exponentiation, - LeekTokenKind::LeftShift => Self::LeftShift, - LeekTokenKind::RightShift => Self::RightShift, - LeekTokenKind::LogicalOr => Self::LogicalOr, - LeekTokenKind::LogicalAnd => Self::LogicalAnd, + TokenKind::DoubleEquals => Self::DoubleEquals, + TokenKind::LessThan => Self::LessThan, + TokenKind::LessThanOrEqual => Self::LessThanOrEqual, + TokenKind::GreaterThan => Self::GreaterThan, + TokenKind::GreaterThanOrEqual => Self::GreaterThanOrEqual, + TokenKind::Plus => Self::Plus, + TokenKind::Minus => Self::Minus, + TokenKind::Asterisk => Self::Asterisk, + TokenKind::Divide => Self::Divide, + TokenKind::Modulo => Self::Modulo, + TokenKind::BitwiseXor => Self::BitwiseXor, + TokenKind::BitwiseOr => Self::BitwiseOr, + TokenKind::BitwiseAnd => Self::BitwiseAnd, + TokenKind::Exponentiation => Self::Exponentiation, + TokenKind::LeftShift => Self::LeftShift, + TokenKind::RightShift => Self::RightShift, + TokenKind::LogicalOr => Self::LogicalOr, + TokenKind::LogicalAnd => Self::LogicalAnd, _ => unreachable!("Invalid binary operator {:?}", node.kind), } } @@ -559,13 +553,13 @@ impl FromNode for FunctionDefinition { // Make sure nodes are correct assert_eq!( parameter_nodes.first().unwrap().terminal_token().kind, - LeekTokenKind::OpenParen, + TokenKind::OpenParen, "Expected first token of params to be open paren" ); assert_eq!( parameter_nodes.last().unwrap().terminal_token().kind, - LeekTokenKind::CloseParen, + TokenKind::CloseParen, "Expected last token of params to be close paren" ); @@ -575,7 +569,7 @@ impl FromNode for FunctionDefinition { if i % 2 == 0 { assert_eq!( parameter_nodes.get(i).unwrap().terminal_token().kind, - LeekTokenKind::Comma, + TokenKind::Comma, "Expected token to be comma" ); continue; @@ -597,7 +591,7 @@ impl FromNode for FunctionDefinition { assert_eq!( function_return_type.children[0].terminal_token().kind, - LeekTokenKind::Arrow, + TokenKind::Arrow, "Expected first token of return type to be arrow" ); @@ -629,7 +623,7 @@ impl FromNode for FunctionParameter { let identifier = node.children[0].terminal_token().text.clone(); - assert!(node.children[1].terminal_token().kind == LeekTokenKind::Colon); + assert!(node.children[1].terminal_token().kind == TokenKind::Colon); let ty = Type::from_node(node.children[2].non_terminal()); @@ -645,12 +639,12 @@ impl FromNode for Block { assert_eq!( node.children.first().unwrap().terminal_token().kind, - LeekTokenKind::OpenCurlyBracket + TokenKind::OpenCurlyBracket ); assert_eq!( node.children.last().unwrap().terminal_token().kind, - LeekTokenKind::CloseCurlyBracket + TokenKind::CloseCurlyBracket ); let mut statements = Vec::new(); @@ -665,24 +659,24 @@ impl FromNode for Block { } impl FromTerminal for AssignmentOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::Equals => Self::Equals, - LeekTokenKind::PlusEquals => Self::PlusEquals, - LeekTokenKind::MinusEquals => Self::MinusEquals, - LeekTokenKind::MultiplyEquals => Self::MultiplyEquals, - LeekTokenKind::DivideEquals => Self::DivideEquals, - LeekTokenKind::ModuloEquals => Self::ModuloEquals, - LeekTokenKind::BitwiseNotEquals => Self::BitwiseNotEquals, - LeekTokenKind::BitwiseXorEquals => Self::BitwiseXorEquals, - LeekTokenKind::BitwiseOrEquals => Self::BitwiseOrEquals, - LeekTokenKind::BitwiseAndEquals => Self::BitwiseAndEquals, - LeekTokenKind::LogicalNotEquals => Self::LogicalNotEquals, - LeekTokenKind::ExponentiationEquals => Self::ExponentiationEquals, - LeekTokenKind::LeftShiftEquals => Self::LeftShiftEquals, - LeekTokenKind::RightShiftEquals => Self::RightShiftEquals, - LeekTokenKind::LogicalOrEquals => Self::LogicalOrEquals, - LeekTokenKind::LogicalAndEquals => Self::LogicalAndEquals, + TokenKind::Equals => Self::Equals, + TokenKind::PlusEquals => Self::PlusEquals, + TokenKind::MinusEquals => Self::MinusEquals, + TokenKind::MultiplyEquals => Self::MultiplyEquals, + TokenKind::DivideEquals => Self::DivideEquals, + TokenKind::ModuloEquals => Self::ModuloEquals, + TokenKind::BitwiseNotEquals => Self::BitwiseNotEquals, + TokenKind::BitwiseXorEquals => Self::BitwiseXorEquals, + TokenKind::BitwiseOrEquals => Self::BitwiseOrEquals, + TokenKind::BitwiseAndEquals => Self::BitwiseAndEquals, + TokenKind::LogicalNotEquals => Self::LogicalNotEquals, + TokenKind::ExponentiationEquals => Self::ExponentiationEquals, + TokenKind::LeftShiftEquals => Self::LeftShiftEquals, + TokenKind::RightShiftEquals => Self::RightShiftEquals, + TokenKind::LogicalOrEquals => Self::LogicalOrEquals, + TokenKind::LogicalAndEquals => Self::LogicalAndEquals, _ => { panic!("Invalid assignment operator {:?}", node.kind); } @@ -791,7 +785,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -831,7 +825,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -871,7 +865,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -911,7 +905,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -950,7 +944,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -998,7 +992,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -1038,7 +1032,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), @@ -1081,7 +1075,7 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), diff --git a/compiler/src/frontend/ast/mod.rs b/compiler/src/frontend/ast/mod.rs index 3174a66..21d1bac 100644 --- a/compiler/src/frontend/ast/mod.rs +++ b/compiler/src/frontend/ast/mod.rs @@ -5,12 +5,12 @@ use super::position::{SourceFile, Span}; pub mod builder; #[derive(Debug)] -pub struct LeekAst { +pub struct Ast { pub source_file: SourceFile, pub items: Vec, } -impl PartialEq for LeekAst { +impl PartialEq for Ast { fn eq(&self, other: &Self) -> bool { self.items == other.items } diff --git a/compiler/src/frontend/lexer.rs b/compiler/src/frontend/lexer.rs index 5756972..1d4b349 100644 --- a/compiler/src/frontend/lexer.rs +++ b/compiler/src/frontend/lexer.rs @@ -12,17 +12,17 @@ use crate::{ #[allow(dead_code)] #[cfg_attr(not(test), derive(Debug))] #[derive(Clone)] -pub struct LeekToken { - pub kind: LeekTokenKind, +pub struct Token { + pub kind: TokenKind, pub text: String, pub span: Span, } -impl From<(LeekTokenKind, T)> for LeekToken +impl From<(TokenKind, T)> for Token where T: Into + Sized, { - fn from((kind, text): (LeekTokenKind, T)) -> Self { + fn from((kind, text): (TokenKind, T)) -> Self { Self { kind, text: text.into(), @@ -32,22 +32,22 @@ where } #[cfg(test)] -impl Debug for LeekToken { +impl Debug for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("LeekToken") + f.debug_struct("Token") .field("kind", &self.kind) .field("text", &self.text) .finish() } } -impl Display for LeekToken { +impl Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:?} => {:?}", self.kind, self.text) } } -impl PartialEq for LeekToken { +impl PartialEq for Token { fn eq(&self, other: &Self) -> bool { self.kind == other.kind && self.text == other.text } @@ -96,7 +96,7 @@ impl TryFrom<&String> for KeywordKind { } #[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum LeekTokenKind { +pub enum TokenKind { // Significant Whitespace Newline, @@ -174,7 +174,7 @@ pub enum LeekTokenKind { DollarSign, // $ } -impl LeekTokenKind { +impl TokenKind { pub fn is_assignment_operator(&self) -> bool { matches!( self, @@ -232,7 +232,7 @@ impl LeekTokenKind { ) } - fn grouping_symbol_from(c: char) -> LeekTokenKind { + fn grouping_symbol_from(c: char) -> TokenKind { match c { '(' => Self::OpenParen, ')' => Self::CloseParen, @@ -244,7 +244,7 @@ impl LeekTokenKind { } } - fn single_operator_from(c: char) -> LeekTokenKind { + fn single_operator_from(c: char) -> TokenKind { match c { '=' => Self::Equals, '<' => Self::LessThan, @@ -263,7 +263,7 @@ impl LeekTokenKind { } } - fn double_operator_from(c: char) -> LeekTokenKind { + fn double_operator_from(c: char) -> TokenKind { match c { '*' => Self::Exponentiation, '<' => Self::LeftShift, @@ -274,7 +274,7 @@ impl LeekTokenKind { } } - fn single_equals_operator_from(c: char) -> LeekTokenKind { + fn single_equals_operator_from(c: char) -> TokenKind { match c { '=' => Self::DoubleEquals, '<' => Self::LessThanOrEqual, @@ -293,7 +293,7 @@ impl LeekTokenKind { } } - fn double_equals_operator_from(c: char) -> LeekTokenKind { + fn double_equals_operator_from(c: char) -> TokenKind { match c { '*' => Self::ExponentiationEquals, '<' => Self::LeftShiftEquals, @@ -304,7 +304,7 @@ impl LeekTokenKind { } } - fn other_symbol_from(c: impl Into) -> LeekTokenKind { + fn other_symbol_from(c: impl Into) -> TokenKind { match c.into().as_str() { "->" => Self::Arrow, "?" => Self::QuestionMark, @@ -357,7 +357,7 @@ impl From for LexerError { #[derive(Debug, PartialEq)] pub enum LexerErrorKind { UnexpectedChar(char), - UnclosedWrappedLiteral(LeekTokenKind), + UnclosedWrappedLiteral(TokenKind), UnexpectedEndOfFloatLiteral, UnexpectedCharactersInFloatLiteral, UnexpectedExtraPeriodInFloatLiteral, @@ -440,18 +440,6 @@ impl Display for LexerError { } } -/// Represents a generic Lexer object -pub trait Lexer { - fn next(&mut self) -> Result, LexerError>; - fn has_next(&self) -> Result; - fn peek(&self) -> Result, LexerError>; - fn peek_nth(&self, n: usize) -> Result, LexerError>; - fn get_position(&self) -> &Position; - fn get_source_file(&self) -> &SourceFile; -} - -/// Defines a specific Lexer for Leek -/// /// This lexer implementation uses a "lazy" iterator approach such /// that characters are not read from the input stream until a token is requested. /// @@ -462,14 +450,14 @@ pub trait Lexer { /// UnsafeCell is used to allow for an optimization of the peek function that stores /// the peeked tokens in a VecDeque. This is done to avoid having to re-lex the same /// tokens multiple times. -pub struct LeekLexer { +pub struct Lexer { character_reader: UnsafeCell>, - peek_forward: UnsafeCell>, + peek_forward: UnsafeCell>, } -impl LeekLexer { +impl Lexer { pub fn new(character_reader: impl CharacterReader + 'static) -> Self { - LeekLexer { + Lexer { character_reader: UnsafeCell::new(Box::new(character_reader)), peek_forward: UnsafeCell::new(VecDeque::new()), } @@ -477,11 +465,7 @@ impl LeekLexer { /// Read a literal that is wrapped in the provided character /// The wrapper character can be escaped using the backslash character `\` - fn read_wrapped_escapable( - &self, - wrapper: char, - kind: LeekTokenKind, - ) -> Result { + fn read_wrapped_escapable(&self, wrapper: char, kind: TokenKind) -> Result { let character_reader = unsafe { &mut *self.character_reader.get() }; let mut text = String::new(); @@ -547,7 +531,7 @@ impl LeekLexer { let end = character_reader.get_position().clone(); - Ok(LeekToken { + Ok(Token { kind, text, span: Span::new(start, end), @@ -555,7 +539,7 @@ impl LeekLexer { } /// Reads a generic number literal into either an integer or double - fn read_number_literal(&self) -> Result { + fn read_number_literal(&self) -> Result { /* * Integer Cases: * @@ -621,7 +605,7 @@ impl LeekLexer { &self, literal_kind: IntegerLiteralKind, is_in_base: fn(char) -> bool, - ) -> Result { + ) -> Result { let character_reader = unsafe { &mut *self.character_reader.get() }; macro_rules! create_error { @@ -684,14 +668,14 @@ impl LeekLexer { let end = character_reader.get_position().clone(); - Ok(LeekToken { - kind: LeekTokenKind::IntegerLiteral(literal_kind), + Ok(Token { + kind: TokenKind::IntegerLiteral(literal_kind), text, span: Span::new(start, end), }) } - fn read_dec_int_or_float_literal(&self) -> Result { + fn read_dec_int_or_float_literal(&self) -> Result { enum NumberLexingState { Integer, Float, @@ -792,12 +776,12 @@ impl LeekLexer { let end = character_reader.get_position().clone(); - Ok(LeekToken { + Ok(Token { kind: match state { NumberLexingState::Integer => { - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal) + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal) } - NumberLexingState::Float => LeekTokenKind::FloatLiteral, + NumberLexingState::Float => TokenKind::FloatLiteral, }, text, span: Span::new(start, end), @@ -839,14 +823,14 @@ impl LeekLexer { } /// Requires character to be available - fn read_single(&self, kind: LeekTokenKind) -> LeekToken { + fn read_single(&self, kind: TokenKind) -> Token { let character_reader = unsafe { &mut *self.character_reader.get() }; let start = character_reader.get_position().clone(); let c = character_reader.next().unwrap(); let end = character_reader.get_position().clone(); - LeekToken { + Token { kind, text: c.into(), span: Span::new(start, end), @@ -875,7 +859,7 @@ impl LeekLexer { /// Reads a fixed number of chars from the character reader and returns the resulting token /// /// Requires that the character reader be checked in advance to contain the correct sequence - fn read_multi(&self, string: &str, kind: LeekTokenKind) -> LeekToken { + fn read_multi(&self, string: &str, kind: TokenKind) -> Token { let character_reader = unsafe { &mut *self.character_reader.get() }; let mut text = String::new(); @@ -899,7 +883,7 @@ impl LeekLexer { let end = character_reader.get_position().clone(); - LeekToken { + Token { kind, text, span: Span::new(start, end), @@ -917,19 +901,14 @@ impl LeekLexer { /// Reads a fixed number of chars with an `=` suffixed to the given prefix from the character reader and returns the resulting token /// /// Requires that the character reader be checked in advance to contain the correct sequence - fn read_multi_equals(&self, prefix: impl Into, kind: LeekTokenKind) -> LeekToken { + fn read_multi_equals(&self, prefix: impl Into, kind: TokenKind) -> Token { let mut c: String = prefix.into(); c.push('='); self.read_multi(&c, kind) } - fn read_single_operator( - &self, - c: char, - single: LeekTokenKind, - equals: LeekTokenKind, - ) -> LeekToken { + fn read_single_operator(&self, c: char, single: TokenKind, equals: TokenKind) -> Token { if self.lookahead_has_equals(c, 0) { self.read_multi_equals(c, equals) } else { @@ -937,12 +916,7 @@ impl LeekLexer { } } - fn read_double_operator( - &self, - c: char, - normal: LeekTokenKind, - equals: LeekTokenKind, - ) -> LeekToken { + fn read_double_operator(&self, c: char, normal: TokenKind, equals: TokenKind) -> Token { if self.lookahead_has_equals(c, 1) { self.read_multi_equals(c.to_string().repeat(2), equals) } else { @@ -950,7 +924,7 @@ impl LeekLexer { } } - fn read_next_token(&self) -> Result, LexerError> { + fn read_next_token(&self) -> Result, LexerError> { let character_reader = unsafe { &mut *self.character_reader.get() }; while character_reader.has_next() { @@ -961,7 +935,7 @@ impl LeekLexer { let token = Ok(Some(match first_char { // New lines are significant - '\n' => self.read_single(LeekTokenKind::Newline), + '\n' => self.read_single(TokenKind::Newline), // Whitespace a if a.is_ascii_whitespace() => { @@ -979,10 +953,10 @@ impl LeekLexer { a if a.is_ascii_alphabetic() => { let word = self.read_while(|c| c.is_ascii_alphanumeric() || c == '_'); - LeekToken { + Token { kind: match KeywordKind::try_from(&word) { - Ok(kw_kind) => LeekTokenKind::Keyword(kw_kind), - Err(_) => LeekTokenKind::Identifier, + Ok(kw_kind) => TokenKind::Keyword(kw_kind), + Err(_) => TokenKind::Identifier, }, text: word, span: Span::new(start, character_reader.get_position().clone()), @@ -990,28 +964,28 @@ impl LeekLexer { } // Literals - '"' => self.read_wrapped_escapable('"', LeekTokenKind::StringLiteral)?, - '\'' => self.read_wrapped_escapable('\'', LeekTokenKind::CharLiteral)?, + '"' => self.read_wrapped_escapable('"', TokenKind::StringLiteral)?, + '\'' => self.read_wrapped_escapable('\'', TokenKind::CharLiteral)?, a if a.is_ascii_digit() => self.read_number_literal()?, // Grouping Symbols c @ ('(' | ')' | '[' | ']' | '{' | '}') => { - self.read_single(LeekTokenKind::grouping_symbol_from(c)) + self.read_single(TokenKind::grouping_symbol_from(c)) } // Arrows (`->`) '-' if character_reader.peek_nth(1).is_some_and(|c| *c == '>') => { - self.read_multi("->", LeekTokenKind::Arrow) + self.read_multi("->", TokenKind::Arrow) } // Bang Coalescing (`!.`) '!' if character_reader.peek_nth(1).is_some_and(|c| *c == '.') => { - self.read_multi("!.", LeekTokenKind::BangCoalescing) + self.read_multi("!.", TokenKind::BangCoalescing) } // Double Colon (`::`) ':' if character_reader.peek_nth(1).is_some_and(|c| *c == ':') => { - self.read_multi("::", LeekTokenKind::DoubleColon) + self.read_multi("::", TokenKind::DoubleColon) } // Double operators (must come first because of lookahead clash) @@ -1020,8 +994,8 @@ impl LeekLexer { { self.read_double_operator( c, - LeekTokenKind::double_operator_from(c), - LeekTokenKind::double_equals_operator_from(c), + TokenKind::double_operator_from(c), + TokenKind::double_equals_operator_from(c), ) } @@ -1029,13 +1003,13 @@ impl LeekLexer { c @ ('=' | '<' | '>' | '+' | '-' | '*' | '/' | '%' | '~' | '!' | '&' | '|' | '^') => self.read_single_operator( c, - LeekTokenKind::single_operator_from(c), - LeekTokenKind::single_equals_operator_from(c), + TokenKind::single_operator_from(c), + TokenKind::single_equals_operator_from(c), ), // Non-Operator symbols c @ ('?' | ',' | ';' | ':' | '.' | '\\' | '_' | '@' | '#' | '$') => { - self.read_single(LeekTokenKind::other_symbol_from(c)) + self.read_single(TokenKind::other_symbol_from(c)) } // Other @@ -1056,7 +1030,7 @@ impl LeekLexer { Ok(None) } - fn _next(&self) -> Result, LexerError> { + fn _next(&self) -> Result, LexerError> { let peek_forward = unsafe { &mut *self.peek_forward.get() }; // Check if more tokens have already been precomputed for us @@ -1069,12 +1043,13 @@ impl LeekLexer { } } -impl Lexer for LeekLexer { - fn next(&mut self) -> Result, LexerError> { +/// Lexer public interface +impl Lexer { + pub fn next(&mut self) -> Result, LexerError> { self._next() } - fn peek(&self) -> Result, LexerError> { + pub fn peek(&self) -> Result, LexerError> { let peek_forward = unsafe { &mut *self.peek_forward.get() }; // Check if more tokens have already been precomputed for us @@ -1098,7 +1073,7 @@ impl Lexer for LeekLexer { } } - fn peek_nth(&self, n: usize) -> Result, LexerError> { + pub fn peek_nth(&self, n: usize) -> Result, LexerError> { let peek_forward = unsafe { &mut *self.peek_forward.get() }; // Check if `n` tokens have already been precomputed for us @@ -1112,8 +1087,8 @@ impl Lexer for LeekLexer { for _ in peek_forward.len()..=n { // Get the next token or return early if none more are found let Some(token) = self.read_next_token()? else { - return Ok(None); - }; + return Ok(None); + }; // Store the token for later usage peek_forward.push_back(token); @@ -1123,17 +1098,17 @@ impl Lexer for LeekLexer { Ok(peek_forward.get(n)) } - fn has_next(&self) -> Result { + pub fn has_next(&self) -> Result { Ok(self.peek()?.is_some()) } - fn get_position(&self) -> &Position { + pub fn get_position(&self) -> &Position { let character_reader = unsafe { &*self.character_reader.get() }; character_reader.get_position() } - fn get_source_file(&self) -> &SourceFile { + pub fn get_source_file(&self) -> &SourceFile { let character_reader = unsafe { &*self.character_reader.get() }; character_reader.get_source_file() @@ -1143,18 +1118,16 @@ impl Lexer for LeekLexer { #[cfg(test)] mod test { use crate::{ - frontend::lexer::{ - IntegerLiteralKind::*, KeywordKind::*, LeekToken as LT, LeekTokenKind::*, - }, + frontend::lexer::{IntegerLiteralKind::*, KeywordKind::*, Token as LT, TokenKind::*}, frontend::reader::FileReader, }; - use super::{LeekLexer, Lexer, LexerError, LexerErrorKind::*}; + use super::{Lexer, LexerError, LexerErrorKind::*}; fn compare_input_to_expected(input: &str, expected_tokens: Vec) { // Collect tokens from lexer let reader = FileReader::from(input.to_owned()); - let mut lexer = LeekLexer::new(reader); + let mut lexer = Lexer::new(reader); let mut lexer_tokens = Vec::new(); @@ -1171,7 +1144,7 @@ mod test { fn lex_input(input: &str) -> Result, LexerError> { // Collect tokens from lexer let reader = FileReader::from(input.to_owned()); - let mut lexer = LeekLexer::new(reader); + let mut lexer = Lexer::new(reader); let mut lexer_tokens = Vec::new(); diff --git a/compiler/src/frontend/mod.rs b/compiler/src/frontend/mod.rs index 5c4ac64..e97be17 100644 --- a/compiler/src/frontend/mod.rs +++ b/compiler/src/frontend/mod.rs @@ -1,15 +1,11 @@ use std::path::PathBuf; use crate::{ - common::error::LeekCompilerError, - frontend::{ - lexer::LeekLexer, - parser::{LeekParser, Parser}, - reader::FileReader, - }, + common::error::CompilerError, + frontend::{lexer::Lexer, parser::Parser, reader::FileReader}, }; -use self::ast::LeekAst; +use self::ast::Ast; pub mod ast; pub mod lexer; @@ -17,26 +13,26 @@ pub mod parser; pub mod position; pub mod reader; -pub fn parse_file(path: PathBuf) -> Result { +pub fn parse_file(path: PathBuf) -> Result { let reader = FileReader::new(path)?; - let lexer = LeekLexer::new(reader); - let parse_tree = LeekParser::parse(lexer)?; + let lexer = Lexer::new(reader); + let parse_tree = Parser::parse(lexer)?; println!("{}", &parse_tree.root); - let ast = LeekAst::build_from(parse_tree); + let ast = Ast::build_from(parse_tree); Ok(ast) } -pub fn parse_string(source: String) -> Result { +pub fn parse_string(source: String) -> Result { let reader = FileReader::from(source); - let lexer = LeekLexer::new(reader); - let parse_tree = LeekParser::parse(lexer)?; + let lexer = Lexer::new(reader); + let parse_tree = Parser::parse(lexer)?; println!("{}", &parse_tree.root); - let ast = LeekAst::build_from(parse_tree); + let ast = Ast::build_from(parse_tree); Ok(ast) } diff --git a/compiler/src/frontend/parser.rs b/compiler/src/frontend/parser.rs index 768960e..cbbf33f 100644 --- a/compiler/src/frontend/parser.rs +++ b/compiler/src/frontend/parser.rs @@ -2,8 +2,8 @@ use core::panic; use std::fmt::Display; use crate::{ - common::error::LeekCompilerError, - frontend::lexer::{IntegerLiteralKind, KeywordKind, LeekToken, LeekTokenKind, Lexer}, + common::error::CompilerError, + frontend::lexer::{IntegerLiteralKind, KeywordKind, Lexer, Token, TokenKind}, frontend::position::{SourceFile, Span}, }; @@ -23,7 +23,7 @@ impl PartialEq for ParseTree { #[derive(Debug, PartialEq, Clone)] pub enum ParseTreeNode { - Terminal(LeekToken), + Terminal(Token), NonTerminal(ParseTreeNodeNonTerminal), } @@ -51,7 +51,7 @@ impl ParseTreeNode { } } - pub fn terminal_token(&self) -> &LeekToken { + pub fn terminal_token(&self) -> &Token { if let ParseTreeNode::Terminal(token) = self { token } else { @@ -157,8 +157,8 @@ impl Display for ParserError { #[derive(Debug)] pub enum ParserErrorKind { UnexpectedToken { - expected: Vec, - found: LeekTokenKind, + expected: Vec, + found: TokenKind, }, UnexpectedKeyword { expected: Vec, @@ -168,32 +168,19 @@ pub enum ParserErrorKind { IndexIntoNonIdentifier, } -pub trait Parser { - /// Takes in a lexer and returns the root of a parse tree - fn parse(lexer: impl Lexer + 'static) -> Result; +pub struct Parser { + lexer: Lexer, } -pub struct LeekParser { - lexer: Box, -} - -impl Parser for LeekParser { - fn parse(lexer: impl Lexer + 'static) -> Result { - let mut parser = LeekParser::new(lexer); +impl Parser { + pub fn parse(lexer: Lexer) -> Result { + let mut parser = Parser { lexer }; parser.parse_from_lexer() } -} - -impl LeekParser { - fn new(lexer: impl Lexer + 'static) -> Self { - Self { - lexer: Box::new(lexer), - } - } /// Peeks the next token or returns an error if there are none left - fn peek_expect(&self) -> Result<&LeekToken, LeekCompilerError> { + fn peek_expect(&self) -> Result<&Token, CompilerError> { self.lexer.peek()?.ok_or_else(|| { ParserError { kind: ParserErrorKind::UnexpectedEndOfInput, @@ -205,7 +192,7 @@ impl LeekParser { } /// Grabs the next token and asserts that it is the provided type - fn peek_expect_is(&self, kind: LeekTokenKind) -> Result { + fn peek_expect_is(&self, kind: TokenKind) -> Result { let token = self.peek_expect()?; Ok(token.kind == kind) @@ -213,7 +200,7 @@ impl LeekParser { /// Peeks the nth token or returns an error if there are none left #[allow(unused)] - fn peek_nth_expect(&self, n: usize) -> Result<&LeekToken, LeekCompilerError> { + fn peek_nth_expect(&self, n: usize) -> Result<&Token, CompilerError> { self.lexer.peek_nth(n)?.ok_or_else(|| { ParserError { kind: ParserErrorKind::UnexpectedEndOfInput, @@ -225,10 +212,7 @@ impl LeekParser { } /// Peeks the next token and asserts that it is one of the provided types - fn peek_expect_is_of( - &self, - kinds: Vec, - ) -> Result<&LeekToken, LeekCompilerError> { + fn peek_expect_is_of(&self, kinds: Vec) -> Result<&Token, CompilerError> { let token = self.peek_expect()?; if !kinds.contains(&token.kind) { @@ -247,10 +231,7 @@ impl LeekParser { } /// Searches the next token ignoring new lines - fn peek_nth_ignore_whitespace( - &self, - n: usize, - ) -> Result, LeekCompilerError> { + fn peek_nth_ignore_whitespace(&self, n: usize) -> Result, CompilerError> { let mut peek_index = 0; let mut non_nl_tokens = 0; @@ -260,7 +241,7 @@ impl LeekParser { }; match peeked.kind { - LeekTokenKind::Newline => { + TokenKind::Newline => { peek_index += 1; continue; } @@ -279,17 +260,17 @@ impl LeekParser { } /// Peeks the nth token or returns an error if there are none left - fn peek_nth_ignore_whitespace_expect(&self, n: usize) -> Result<&LeekToken, LeekCompilerError> { + fn peek_nth_ignore_whitespace_expect(&self, n: usize) -> Result<&Token, CompilerError> { self.peek_nth_ignore_whitespace(n)? .ok_or_else(|| self.create_error(ParserErrorKind::UnexpectedEndOfInput)) } /// Ignores tokens while they are new lines - fn bleed_whitespace(&mut self) -> Result<(), LeekCompilerError> { + fn bleed_whitespace(&mut self) -> Result<(), CompilerError> { while self .lexer .peek()? - .is_some_and(|t| t.kind == LeekTokenKind::Newline) + .is_some_and(|t| t.kind == TokenKind::Newline) { self.lexer.next()?; } @@ -298,14 +279,14 @@ impl LeekParser { } /// Grabs the next token or throws an error if none were found - fn next_expect(&mut self) -> Result { + fn next_expect(&mut self) -> Result { self.lexer .next()? .ok_or_else(|| self.create_error(ParserErrorKind::UnexpectedEndOfInput)) } /// Grabs the next token and asserts that it is the provided type - fn next_expect_is(&mut self, kind: LeekTokenKind) -> Result { + fn next_expect_is(&mut self, kind: TokenKind) -> Result { let token = self.next_expect()?; if token.kind != kind { @@ -322,10 +303,7 @@ impl LeekParser { } /// Gets the next token and asserts that it is one of the provided types - fn next_expect_is_of( - &mut self, - kinds: Vec, - ) -> Result { + fn next_expect_is_of(&mut self, kinds: Vec) -> Result { let token = self.next_expect()?; if !kinds.contains(&token.kind) { @@ -342,7 +320,7 @@ impl LeekParser { } /// Creates the associated error variant from the lexer's current position - fn create_error(&self, kind: ParserErrorKind) -> LeekCompilerError { + fn create_error(&self, kind: ParserErrorKind) -> CompilerError { ParserError { kind, source_file: self.lexer.get_source_file().clone(), @@ -352,7 +330,7 @@ impl LeekParser { } /// Creates the associated error variant from a span - fn create_error_with_span(&self, kind: ParserErrorKind, span: Span) -> LeekCompilerError { + fn create_error_with_span(&self, kind: ParserErrorKind, span: Span) -> CompilerError { ParserError { kind, source_file: self.lexer.get_source_file().clone(), @@ -370,22 +348,20 @@ impl LeekParser { /// | StaticVariableDeclaration /// )+ /// - fn parse_program_part(&mut self) -> Result { + fn parse_program_part(&mut self) -> Result { let peeked_token = self.peek_expect()?; match peeked_token.kind { // FunctionDefinition or FunctionDeclaration - LeekTokenKind::Keyword(KeywordKind::Fn) => { - self.parse_function_declaration_or_definition() - } + TokenKind::Keyword(KeywordKind::Fn) => self.parse_function_declaration_or_definition(), // StructDefinition - LeekTokenKind::Keyword(KeywordKind::Struct) => self.parse_struct_definition(), + TokenKind::Keyword(KeywordKind::Struct) => self.parse_struct_definition(), // ConstantVariableDeclaration - LeekTokenKind::Keyword(KeywordKind::Perm) => self.parse_constant_variable_declaration(), + TokenKind::Keyword(KeywordKind::Perm) => self.parse_constant_variable_declaration(), // StaticVariableDeclaration - LeekTokenKind::Keyword(KeywordKind::Hold) => self.parse_static_variable_declaration(), + TokenKind::Keyword(KeywordKind::Hold) => self.parse_static_variable_declaration(), // Unexpected keyword - LeekTokenKind::Keyword(kw) => Err(self.create_error_with_span( + TokenKind::Keyword(kw) => Err(self.create_error_with_span( ParserErrorKind::UnexpectedKeyword { expected: vec![ KeywordKind::Fn, @@ -401,10 +377,10 @@ impl LeekParser { _ => Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Keyword(KeywordKind::Fn), - LeekTokenKind::Keyword(KeywordKind::Struct), - LeekTokenKind::Keyword(KeywordKind::Perm), - LeekTokenKind::Keyword(KeywordKind::Hold), + TokenKind::Keyword(KeywordKind::Fn), + TokenKind::Keyword(KeywordKind::Struct), + TokenKind::Keyword(KeywordKind::Perm), + TokenKind::Keyword(KeywordKind::Hold), ], found: peeked_token.kind, }, @@ -419,13 +395,11 @@ impl LeekParser { /// FunctionDeclaration :: /// `fn` QualifiedIdentifier FunctionParameters FunctionReturnType? Newline /// - fn parse_function_declaration_or_definition( - &mut self, - ) -> Result { + fn parse_function_declaration_or_definition(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Fn))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Fn))? )); self.bleed_whitespace()?; @@ -436,7 +410,7 @@ impl LeekParser { if self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::Arrow) + .is_some_and(|token| token.kind == TokenKind::Arrow) { self.bleed_whitespace()?; children.push(self.parse_return_type()?); @@ -453,9 +427,9 @@ impl LeekParser { /// FunctionReturnType :: /// `->` Type - fn parse_return_type(&mut self) -> Result { + fn parse_return_type(&mut self) -> Result { let children = vec![ - terminal!(self.next_expect_is(LeekTokenKind::Arrow)?), + terminal!(self.next_expect_is(TokenKind::Arrow)?), self.parse_type()?, ]; @@ -469,20 +443,20 @@ impl LeekParser { /// `(` /// (TypeAssociation `,`)* TypeAssociation /// `)` - fn parse_function_parameters(&mut self) -> Result { + fn parse_function_parameters(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; match self.peek_expect()?.kind { - LeekTokenKind::CloseParen => {} + TokenKind::CloseParen => {} _ => { children.push(self.parse_type_association()?); self.bleed_whitespace()?; - while self.peek_expect_is(LeekTokenKind::Comma)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Comma)?)); + while self.peek_expect_is(TokenKind::Comma)? { + children.push(terminal!(self.next_expect_is(TokenKind::Comma)?)); self.bleed_whitespace()?; children.push(self.parse_type_association()?); self.bleed_whitespace()?; @@ -491,7 +465,7 @@ impl LeekParser { } self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::FunctionParameters, @@ -503,12 +477,10 @@ impl LeekParser { /// `{` /// (Block | Statement)* /// `}` - fn parse_block(&mut self) -> Result { + fn parse_block(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; while self.lexer.has_next()? { @@ -516,21 +488,19 @@ impl LeekParser { match token.kind { // Ignore preceding newlines - LeekTokenKind::Newline => { + TokenKind::Newline => { self.lexer.next()?; } // Allow recursive blocks - LeekTokenKind::OpenCurlyBracket => children.push(self.parse_block()?), + TokenKind::OpenCurlyBracket => children.push(self.parse_block()?), // Break the loop if a closing bracket is found - LeekTokenKind::CloseCurlyBracket => break, + TokenKind::CloseCurlyBracket => break, _ => children.push(self.parse_statement()?), } } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); self.bleed_whitespace()?; Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -546,22 +516,22 @@ impl LeekParser { /// | (QualifiedIdentifier assignment Expression) /// | (FunctionCallExpression) /// ) - fn parse_statement(&mut self) -> Result { + fn parse_statement(&mut self) -> Result { let mut children = Vec::new(); match self.peek_expect()?.kind { - LeekTokenKind::Keyword(KeywordKind::Yeet) => { + TokenKind::Keyword(KeywordKind::Yeet) => { children.push(self.parse_yeet_statement()?); } - LeekTokenKind::Keyword(KeywordKind::Leak) => { + TokenKind::Keyword(KeywordKind::Leak) => { children.push(self.parse_local_variable_declaration()?); } - k @ LeekTokenKind::Identifier => { + k @ TokenKind::Identifier => { let identifier = self.parse_qualified_identifier()?; // Could be assignment or function call match self.peek_nth_ignore_whitespace_expect(0)?.kind { - LeekTokenKind::OpenParen => { + TokenKind::OpenParen => { children.push(self.parse_function_call_expression(identifier)?) } k if k.is_assignment_operator() => { @@ -570,23 +540,23 @@ impl LeekParser { _ => { return Err(self.create_error(ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::OpenParen, - LeekTokenKind::Equals, - LeekTokenKind::PlusEquals, - LeekTokenKind::MinusEquals, - LeekTokenKind::MultiplyEquals, - LeekTokenKind::DivideEquals, - LeekTokenKind::ModuloEquals, - LeekTokenKind::BitwiseNotEquals, - LeekTokenKind::BitwiseXorEquals, - LeekTokenKind::BitwiseOrEquals, - LeekTokenKind::BitwiseAndEquals, - LeekTokenKind::LogicalNotEquals, - LeekTokenKind::ExponentiationEquals, - LeekTokenKind::LeftShiftEquals, - LeekTokenKind::RightShiftEquals, - LeekTokenKind::LogicalOrEquals, - LeekTokenKind::LogicalAndEquals, + TokenKind::OpenParen, + TokenKind::Equals, + TokenKind::PlusEquals, + TokenKind::MinusEquals, + TokenKind::MultiplyEquals, + TokenKind::DivideEquals, + TokenKind::ModuloEquals, + TokenKind::BitwiseNotEquals, + TokenKind::BitwiseXorEquals, + TokenKind::BitwiseOrEquals, + TokenKind::BitwiseAndEquals, + TokenKind::LogicalNotEquals, + TokenKind::ExponentiationEquals, + TokenKind::LeftShiftEquals, + TokenKind::RightShiftEquals, + TokenKind::LogicalOrEquals, + TokenKind::LogicalAndEquals, ], found: k, })); @@ -596,9 +566,9 @@ impl LeekParser { k => { return Err(self.create_error(ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Keyword(KeywordKind::Yeet), - LeekTokenKind::Keyword(KeywordKind::Leak), - LeekTokenKind::Identifier, + TokenKind::Keyword(KeywordKind::Yeet), + TokenKind::Keyword(KeywordKind::Leak), + TokenKind::Identifier, ], found: k, })); @@ -606,14 +576,11 @@ impl LeekParser { } match self - .peek_expect_is_of(vec![ - LeekTokenKind::Newline, - LeekTokenKind::CloseCurlyBracket, - ])? + .peek_expect_is_of(vec![TokenKind::Newline, TokenKind::CloseCurlyBracket])? .kind { - LeekTokenKind::Newline => children.push(terminal!(self.next_expect()?)), - LeekTokenKind::CloseCurlyBracket => {} + TokenKind::Newline => children.push(terminal!(self.next_expect()?)), + TokenKind::CloseCurlyBracket => {} _ => unreachable!(), } @@ -623,11 +590,11 @@ impl LeekParser { })) } - fn parse_yeet_statement(&mut self) -> Result { + fn parse_yeet_statement(&mut self) -> Result { let mut children = Vec::with_capacity(2); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Yeet))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Yeet))? )); self.bleed_whitespace()?; @@ -639,24 +606,24 @@ impl LeekParser { })) } - fn parse_local_variable_declaration(&mut self) -> Result { + fn parse_local_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Leak))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Leak))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; // Parse explicit type match self.peek_expect()?.kind { // No type def found - LeekTokenKind::Equals => {} + TokenKind::Equals => {} // Found type def - LeekTokenKind::Colon => { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + TokenKind::Colon => { + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; todo!("parse explicit type in leak statement") @@ -664,7 +631,7 @@ impl LeekParser { k => { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { - expected: vec![LeekTokenKind::Colon, LeekTokenKind::Equals], + expected: vec![TokenKind::Colon, TokenKind::Equals], found: k, }, self.peek_expect()?.span.clone(), @@ -672,7 +639,7 @@ impl LeekParser { } } - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); @@ -686,8 +653,8 @@ impl LeekParser { fn parse_variable_assignment( &mut self, identifier: ParseTreeNode, - operator: LeekTokenKind, - ) -> Result { + operator: TokenKind, + ) -> Result { let mut children = Vec::new(); children.push(identifier); @@ -712,22 +679,20 @@ impl LeekParser { /// | StructInitialization /// | StructFieldAccess /// | StructMethodCall - fn parse_expression(&mut self) -> Result { + fn parse_expression(&mut self) -> Result { let mut node = match self.peek_expect()?.kind { - LeekTokenKind::OpenParen => self.parse_atom()?, - LeekTokenKind::CharLiteral - | LeekTokenKind::StringLiteral - | LeekTokenKind::IntegerLiteral(_) - | LeekTokenKind::FloatLiteral => self.parse_atom()?, + TokenKind::OpenParen => self.parse_atom()?, + TokenKind::CharLiteral + | TokenKind::StringLiteral + | TokenKind::IntegerLiteral(_) + | TokenKind::FloatLiteral => self.parse_atom()?, k if k.is_unary_operator() => self.parse_unary_expression()?, - LeekTokenKind::Identifier => { + TokenKind::Identifier => { let identifier = self.parse_qualified_identifier()?; match self.peek_nth_ignore_whitespace_expect(0)?.kind { - LeekTokenKind::OpenParen => self.parse_function_call_expression(identifier)?, - LeekTokenKind::OpenCurlyBracket => { - self.parse_struct_initialization(identifier)? - } + TokenKind::OpenParen => self.parse_function_call_expression(identifier)?, + TokenKind::OpenCurlyBracket => self.parse_struct_initialization(identifier)?, _ => self.parse_atom_from_identifier(identifier)?, } } @@ -735,15 +700,15 @@ impl LeekParser { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::OpenParen, - LeekTokenKind::CharLiteral, - LeekTokenKind::StringLiteral, - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Octal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), - LeekTokenKind::FloatLiteral, - LeekTokenKind::Identifier, + TokenKind::OpenParen, + TokenKind::CharLiteral, + TokenKind::StringLiteral, + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Octal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), + TokenKind::FloatLiteral, + TokenKind::Identifier, ], found: k, }, @@ -752,7 +717,7 @@ impl LeekParser { } }; - while self.peek_nth_ignore_whitespace_expect(0)?.kind == LeekTokenKind::Period { + while self.peek_nth_ignore_whitespace_expect(0)?.kind == TokenKind::Period { // Check to see if it is an indexable object match node.non_terminal().kind { ParseTreeNonTerminalKind::QualifiedIdentifier @@ -777,12 +742,11 @@ impl LeekParser { let ParseTreeNonTerminalKind::QualifiedIdentifier = child.kind else { return Err(self.create_error(ParserErrorKind::IndexIntoNonIdentifier)); - }; }; node = match self.peek_nth_ignore_whitespace_expect(2)?.kind { - LeekTokenKind::OpenParen => self.parse_struct_method_call( + TokenKind::OpenParen => self.parse_struct_method_call( ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::Expression, children: vec![node], @@ -823,22 +787,22 @@ impl LeekParser { fn parse_function_call_expression( &mut self, identifier: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); children.push(identifier); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; match self.peek_expect()?.kind { - LeekTokenKind::CloseParen => {} + TokenKind::CloseParen => {} _ => children.push(self.parse_function_arguments()?), } self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::FunctionCallExpression, @@ -850,14 +814,14 @@ impl LeekParser { /// ( /// (Expression `,`)* Expression /// ) - fn parse_function_arguments(&mut self) -> Result { + fn parse_function_arguments(&mut self) -> Result { let mut children = Vec::new(); children.push(self.parse_expression()?); self.bleed_whitespace()?; - while self.peek_expect_is(LeekTokenKind::Comma)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Comma)?)); + while self.peek_expect_is(TokenKind::Comma)? { + children.push(terminal!(self.next_expect_is(TokenKind::Comma)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); self.bleed_whitespace()?; @@ -878,37 +842,30 @@ impl LeekParser { fn parse_struct_initialization( &mut self, identifier: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); children.push(identifier); self.bleed_whitespace()?; - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; - while !self.peek_expect_is(LeekTokenKind::CloseCurlyBracket)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + while !self.peek_expect_is(TokenKind::CloseCurlyBracket)? { + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); - self.peek_expect_is_of(vec![ - LeekTokenKind::Newline, - LeekTokenKind::CloseCurlyBracket, - ])?; + self.peek_expect_is_of(vec![TokenKind::Newline, TokenKind::CloseCurlyBracket])?; self.bleed_whitespace()?; } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructInitialization, @@ -921,7 +878,7 @@ impl LeekParser { fn parse_binary_expression( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // TODO: Parse operator precedence (use a stack) @@ -932,24 +889,24 @@ impl LeekParser { // Binary operator children.push(terminal!(self.next_expect_is_of(vec![ - LeekTokenKind::DoubleEquals, - LeekTokenKind::LessThan, - LeekTokenKind::LessThanOrEqual, - LeekTokenKind::GreaterThan, - LeekTokenKind::GreaterThanOrEqual, - LeekTokenKind::Plus, - LeekTokenKind::Minus, - LeekTokenKind::Asterisk, - LeekTokenKind::Divide, - LeekTokenKind::Modulo, - LeekTokenKind::BitwiseXor, - LeekTokenKind::BitwiseOr, - LeekTokenKind::BitwiseAnd, - LeekTokenKind::Exponentiation, - LeekTokenKind::LeftShift, - LeekTokenKind::RightShift, - LeekTokenKind::LogicalOr, - LeekTokenKind::LogicalAnd, + TokenKind::DoubleEquals, + TokenKind::LessThan, + TokenKind::LessThanOrEqual, + TokenKind::GreaterThan, + TokenKind::GreaterThanOrEqual, + TokenKind::Plus, + TokenKind::Minus, + TokenKind::Asterisk, + TokenKind::Divide, + TokenKind::Modulo, + TokenKind::BitwiseXor, + TokenKind::BitwiseOr, + TokenKind::BitwiseAnd, + TokenKind::Exponentiation, + TokenKind::LeftShift, + TokenKind::RightShift, + TokenKind::LogicalOr, + TokenKind::LogicalAnd, ])?)); self.bleed_whitespace()?; @@ -967,7 +924,7 @@ impl LeekParser { fn parse_struct_field_access( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // Left hand expression @@ -975,11 +932,11 @@ impl LeekParser { self.bleed_whitespace()?; // Dot operator - children.push(terminal!(self.next_expect_is(LeekTokenKind::Period)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Period)?)); self.bleed_whitespace()?; // Field - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructFieldAccess, @@ -992,7 +949,7 @@ impl LeekParser { fn parse_struct_method_call( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // Left hand expression @@ -1000,11 +957,11 @@ impl LeekParser { self.bleed_whitespace()?; // Dot operator - children.push(terminal!(self.next_expect_is(LeekTokenKind::Period)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Period)?)); self.bleed_whitespace()?; // Method - let identifier = terminal!(self.next_expect_is(LeekTokenKind::Identifier)?); + let identifier = terminal!(self.next_expect_is(TokenKind::Identifier)?); children.push(self.parse_function_call_expression(identifier)?); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1015,14 +972,14 @@ impl LeekParser { /// UnaryExpression :: /// unary_operator Expression - fn parse_unary_expression(&mut self) -> Result { + fn parse_unary_expression(&mut self) -> Result { let mut children = Vec::new(); // Unary operator children.push(terminal!(self.next_expect_is_of(vec![ - LeekTokenKind::BitwiseNot, - LeekTokenKind::LogicalNot, - LeekTokenKind::Asterisk + TokenKind::BitwiseNot, + TokenKind::LogicalNot, + TokenKind::Asterisk ])?)); self.bleed_whitespace()?; @@ -1041,38 +998,38 @@ impl LeekParser { /// | ( /// `(` Expression `)` /// ) - fn parse_atom(&mut self) -> Result { + fn parse_atom(&mut self) -> Result { let mut children = Vec::new(); match self.peek_expect()?.kind { - LeekTokenKind::Identifier => { + TokenKind::Identifier => { children.push(self.parse_qualified_identifier()?); } k if k.is_literal() => { children.push(terminal!(self.next_expect()?)); } - LeekTokenKind::OpenParen => { - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + TokenKind::OpenParen => { + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); } k => { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Identifier, - LeekTokenKind::OpenParen, - LeekTokenKind::CharLiteral, - LeekTokenKind::StringLiteral, - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Octal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), - LeekTokenKind::FloatLiteral, + TokenKind::Identifier, + TokenKind::OpenParen, + TokenKind::CharLiteral, + TokenKind::StringLiteral, + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Octal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), + TokenKind::FloatLiteral, ], found: k, }, @@ -1090,7 +1047,7 @@ impl LeekParser { fn parse_atom_from_identifier( &mut self, node: ParseTreeNode, - ) -> Result { + ) -> Result { Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::Atom, children: vec![node], @@ -1099,25 +1056,25 @@ impl LeekParser { /// StructDefinition :: /// `struct` identifier StructDefinitionBody? - fn parse_struct_definition(&mut self) -> Result { + fn parse_struct_definition(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Struct))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Struct))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); if self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::OpenCurlyBracket) + .is_some_and(|token| token.kind == TokenKind::OpenCurlyBracket) { self.bleed_whitespace()?; children.push(self.parse_struct_definition_body()?) } else if self.lexer.has_next()? { // If open bracket does not follow, must be None or newline - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1130,22 +1087,20 @@ impl LeekParser { /// `{` /// (TypeAssociation `\n`)* TypeAssociation /// `}` - fn parse_struct_definition_body(&mut self) -> Result { + fn parse_struct_definition_body(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; - if self.peek_nth_ignore_whitespace_expect(0)?.kind != LeekTokenKind::CloseCurlyBracket { + if self.peek_nth_ignore_whitespace_expect(0)?.kind != TokenKind::CloseCurlyBracket { // Non `}`, so parse at last one type association children.push(self.parse_type_association()?); - while self.peek_expect_is(LeekTokenKind::Newline)? { + while self.peek_expect_is(TokenKind::Newline)? { self.bleed_whitespace()?; - if self.peek_expect_is(LeekTokenKind::CloseCurlyBracket)? { + if self.peek_expect_is(TokenKind::CloseCurlyBracket)? { break; } @@ -1153,9 +1108,7 @@ impl LeekParser { } } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructDefinitionBody, @@ -1165,13 +1118,13 @@ impl LeekParser { /// TypeAssociation :: /// (identifier `:` Type) - fn parse_type_association(&mut self) -> Result { + fn parse_type_association(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); @@ -1184,7 +1137,7 @@ impl LeekParser { /// Type :: /// QualifiedIdentifier - fn parse_type(&mut self) -> Result { + fn parse_type(&mut self) -> Result { let children = vec![self.parse_qualified_identifier()?]; Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1195,20 +1148,20 @@ impl LeekParser { /// QualifiedIdentifier :: /// identifier (`::` identifier)* - fn parse_qualified_identifier(&mut self) -> Result { + fn parse_qualified_identifier(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); while self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::DoubleColon) + .is_some_and(|token| token.kind == TokenKind::DoubleColon) { self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::DoubleColon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::DoubleColon)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1219,30 +1172,30 @@ impl LeekParser { /// ConstantVariableDeclaration :: /// `perm` identifier `:` Type `=` Expression - fn parse_constant_variable_declaration(&mut self) -> Result { + fn parse_constant_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Perm))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Perm))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); if self.lexer.has_next()? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1253,30 +1206,30 @@ impl LeekParser { /// StaticVariableDeclaration :: /// `hold` identifier `:` Type `=` Expression - fn parse_static_variable_declaration(&mut self) -> Result { + fn parse_static_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Hold))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Hold))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); if self.lexer.has_next()? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1286,7 +1239,7 @@ impl LeekParser { } /// Internal method to parse all the tokens from the internal lexer - fn parse_from_lexer(&mut self) -> Result { + fn parse_from_lexer(&mut self) -> Result { let mut children = Vec::new(); self.bleed_whitespace()?; @@ -1314,20 +1267,18 @@ mod test { use ansi_term::Color; use crate::{ - frontend::lexer::{IntegerLiteralKind, KeywordKind, LeekLexer, LeekToken, LeekTokenKind}, + frontend::lexer::{IntegerLiteralKind, KeywordKind, Lexer, Token, TokenKind}, frontend::reader::FileReader, }; - use super::{ - LeekParser, ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind, Parser, - }; + use super::{ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind, Parser}; fn compare_input_to_expected(input: &str, expected_tree: ParseTreeNode) { // Collect tokens from lexer let reader = FileReader::from(input.to_owned()); - let lexer = LeekLexer::new(reader); + let lexer = Lexer::new(reader); let parse_tree = - LeekParser::parse(lexer).unwrap_or_else(|e| panic!("Could not parse input: \n{e}")); + Parser::parse(lexer).unwrap_or_else(|e| panic!("Could not parse input: \n{e}")); if parse_tree.root == expected_tree { return; @@ -1352,7 +1303,7 @@ mod test { macro_rules! terminal_from { ($kind:expr, $text:literal) => { - ParseTreeNode::Terminal(LeekToken::from(($kind, $text))) + ParseTreeNode::Terminal(Token::from(($kind, $text))) }; } @@ -1380,139 +1331,139 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "PI"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "PI"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "f32"), + terminal_from!(TokenKind::Identifier, "f32"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::FloatLiteral, + TokenKind::FloatLiteral, "3.1415926535" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "E"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "E"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "f32"), + terminal_from!(TokenKind::Identifier, "f32"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, - vec![terminal_from!(LeekTokenKind::FloatLiteral, "2.178"),] + vec![terminal_from!(TokenKind::FloatLiteral, "2.178"),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "THREE"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "THREE"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "u8"), + terminal_from!(TokenKind::Identifier, "u8"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Hexadecimal ), "0x03" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::StaticVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Hold), "hold"), - terminal_from!(LeekTokenKind::Identifier, "state"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Hold), "hold"), + terminal_from!(TokenKind::Identifier, "state"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "u8"), + terminal_from!(TokenKind::Identifier, "u8"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), "0b0001" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::FunctionDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Fn), "fn"), + terminal_from!(TokenKind::Keyword(KeywordKind::Fn), "fn"), non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "main"), + terminal_from!(TokenKind::Identifier, "main"), ] ), non_terminal!( ParseTreeNonTerminalKind::FunctionParameters, vec![ - terminal_from!(LeekTokenKind::OpenParen, "("), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::OpenParen, "("), + terminal_from!(TokenKind::CloseParen, ")"), ] ), non_terminal!( ParseTreeNonTerminalKind::Block, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), non_terminal!( ParseTreeNonTerminalKind::Statement, vec![ @@ -1520,17 +1471,17 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "a"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "a"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Decimal ), "1" @@ -1539,7 +1490,7 @@ mod test { ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1549,17 +1500,17 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "b"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "b"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Decimal ), "2" @@ -1568,7 +1519,7 @@ mod test { ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1578,11 +1529,11 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "node"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "node"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( @@ -1591,10 +1542,10 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "Node"), + terminal_from!(TokenKind::Identifier, "Node"), ] ), - terminal_from!(LeekTokenKind::OpenParen, "("), + terminal_from!(TokenKind::OpenParen, "("), non_terminal!( ParseTreeNonTerminalKind::FunctionArguments, vec![non_terminal!( @@ -1602,19 +1553,19 @@ mod test { vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::StringLiteral, + TokenKind::StringLiteral, "\"text\"" ),] ),] ),] ), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::CloseParen, ")"), ] ),] ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1626,10 +1577,10 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "println"), + terminal_from!(TokenKind::Identifier, "println"), ] ), - terminal_from!(LeekTokenKind::OpenParen, "("), + terminal_from!(TokenKind::OpenParen, "("), non_terminal!( ParseTreeNonTerminalKind::FunctionArguments, vec![non_terminal!( @@ -1640,20 +1591,20 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "a"), + terminal_from!(TokenKind::Identifier, "a"), ] ) ] ),] ),] ), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::CloseParen, ")"), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ), ] @@ -1685,21 +1636,21 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "EmptyStruct"), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "EmptyStruct"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "SomeStruct"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "SomeStruct"), non_terminal!( ParseTreeNonTerminalKind::StructDefinitionBody, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ) ] @@ -1707,23 +1658,23 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "BinaryTreeNode"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "BinaryTreeNode"), non_terminal!( ParseTreeNonTerminalKind::StructDefinitionBody, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "left"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "left"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "BinaryTreeNode" ),] )] @@ -1733,14 +1684,14 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "right"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "right"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "BinaryTreeNode" ),] )] @@ -1750,21 +1701,21 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "data"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "data"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "i32" ),] )] ), ] ), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ) ] diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 1110f0a..8689e6c 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -3,12 +3,12 @@ use std::path::PathBuf; use clap::Parser; use leek::{ backend::codegen::CodeGenTarget, - common::config::{BuildMode, EmitMode, LeekCompilerConfig, OptimizationLevel}, + common::config::{BuildMode, CompilerConfig, EmitMode, OptimizationLevel}, }; #[derive(Parser, Debug)] #[command(author, version, about = "A bootstrap compiler for the Leek language", long_about = None)] -struct LeekCompilerArgs { +struct CompilerArgs { #[arg(required = true)] input_files: Vec, #[arg(short, long, value_enum, value_name = "EMIT_MODE", default_value_t = EmitMode::default(), help = "Specifies what kind of output to generate")] @@ -27,9 +27,9 @@ struct LeekCompilerArgs { opt_level: OptimizationLevel, } -impl From for LeekCompilerConfig { - fn from(args: LeekCompilerArgs) -> Self { - LeekCompilerConfig { +impl From for CompilerConfig { + fn from(args: CompilerArgs) -> Self { + CompilerConfig { opt_level: args.opt_level, build_mode: if args.release { BuildMode::Release @@ -46,10 +46,10 @@ impl From for LeekCompilerConfig { fn main() { // Get the command line arguments - let args = LeekCompilerArgs::parse(); + let args = CompilerArgs::parse(); // Convert to the global config struct - let config: LeekCompilerConfig = args.into(); + let config: CompilerConfig = args.into(); for file in &config.input_files { let ast = leek::frontend::parse_file(file.into()).unwrap_or_else(|e| e.report()); From 3ce474afcf18f4b79557715bc20b0c557166cec5 Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 19:55:21 -0400 Subject: [PATCH 7/8] refactor: :recycle: reorganize lexer module --- compiler/src/frontend/ast/builder.rs | 2 +- .../src/frontend/{lexer.rs => lexer/mod.rs} | 841 +----------------- compiler/src/frontend/lexer/test.rs | 516 +++++++++++ compiler/src/frontend/lexer/token.rs | 318 +++++++ compiler/src/frontend/parser.rs | 16 +- 5 files changed, 852 insertions(+), 841 deletions(-) rename compiler/src/frontend/{lexer.rs => lexer/mod.rs} (53%) create mode 100644 compiler/src/frontend/lexer/test.rs create mode 100644 compiler/src/frontend/lexer/token.rs diff --git a/compiler/src/frontend/ast/builder.rs b/compiler/src/frontend/ast/builder.rs index 94b99d2..ec50ccc 100644 --- a/compiler/src/frontend/ast/builder.rs +++ b/compiler/src/frontend/ast/builder.rs @@ -9,7 +9,7 @@ use crate::{ StructInitialization, StructMethodCall, UnaryExpression, VariableDeclaration, VariableDeclarationKind, }, - lexer::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, + lexer::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, parser::{ParseTree, ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind}, }, }; diff --git a/compiler/src/frontend/lexer.rs b/compiler/src/frontend/lexer/mod.rs similarity index 53% rename from compiler/src/frontend/lexer.rs rename to compiler/src/frontend/lexer/mod.rs index 1d4b349..777411b 100644 --- a/compiler/src/frontend/lexer.rs +++ b/compiler/src/frontend/lexer/mod.rs @@ -9,320 +9,10 @@ use crate::{ frontend::reader::CharacterReader, }; -#[allow(dead_code)] -#[cfg_attr(not(test), derive(Debug))] -#[derive(Clone)] -pub struct Token { - pub kind: TokenKind, - pub text: String, - pub span: Span, -} - -impl From<(TokenKind, T)> for Token -where - T: Into + Sized, -{ - fn from((kind, text): (TokenKind, T)) -> Self { - Self { - kind, - text: text.into(), - span: Span::from(Position::new()), - } - } -} - -#[cfg(test)] -impl Debug for Token { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Token") - .field("kind", &self.kind) - .field("text", &self.text) - .finish() - } -} - -impl Display for Token { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?} => {:?}", self.kind, self.text) - } -} - -impl PartialEq for Token { - fn eq(&self, other: &Self) -> bool { - self.kind == other.kind && self.text == other.text - } -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum IntegerLiteralKind { - Decimal, - Hexadecimal, - Binary, - Octal, -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum KeywordKind { - Fn, - Struct, - Leak, - Hold, - Perm, - If, - Else, - While, - For, - Yeet, -} - -impl TryFrom<&String> for KeywordKind { - type Error = (); - - fn try_from(value: &String) -> Result { - Ok(match value.as_str() { - "fn" => Self::Fn, - "struct" => Self::Struct, - "leak" => Self::Leak, - "hold" => Self::Hold, - "perm" => Self::Perm, - "if" => Self::If, - "else" => Self::Else, - "while" => Self::While, - "for" => Self::For, - "yeet" => Self::Yeet, - _ => return Err(()), - }) - } -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum TokenKind { - // Significant Whitespace - Newline, - - // Words - Keyword(KeywordKind), // leak - Identifier, // YourMom - - // Grouping - OpenParen, // ( - CloseParen, // ) - OpenBracket, // [ - CloseBracket, // ] - OpenCurlyBracket, // { - CloseCurlyBracket, // } - - // Literals - StringLiteral, // "your mom" - CharLiteral, // 'd' - IntegerLiteral(IntegerLiteralKind), // 69 - FloatLiteral, // 420.69 - - // Single Operators - Equals, // = - DoubleEquals, // == - LessThan, // < - LessThanOrEqual, // <= - GreaterThan, // > - GreaterThanOrEqual, // >= - Plus, // + - PlusEquals, // += - Minus, // - - MinusEquals, // -= - Asterisk, // * - MultiplyEquals, // *= - Divide, // / - DivideEquals, // /= - Modulo, // % - ModuloEquals, // %= - BitwiseNot, // ~ - BitwiseNotEquals, // ~= - BitwiseXor, // ^ - BitwiseXorEquals, // ^= - BitwiseOr, // | - BitwiseOrEquals, // |= - BitwiseAnd, // & - BitwiseAndEquals, // &= - LogicalNot, // ! - LogicalNotEquals, // != - - // Double Operators - Exponentiation, // ** - ExponentiationEquals, // **= - LeftShift, // << - LeftShiftEquals, // <<= - RightShift, // >> - RightShiftEquals, // >>= - LogicalOr, // || - LogicalOrEquals, // ||= - LogicalAnd, // && - LogicalAndEquals, // &&= - - // Non-Operator symbols - Arrow, // -> - QuestionMark, // ? - Comma, // , - Semicolon, // ; - Colon, // : - DoubleColon, // :: - Period, // . - BangCoalescing, // !. - BackSlash, // \ - Underscore, // _ - Asperand, // @ - Hash, // # - DollarSign, // $ -} - -impl TokenKind { - pub fn is_assignment_operator(&self) -> bool { - matches!( - self, - Self::Equals - | Self::PlusEquals - | Self::MinusEquals - | Self::MultiplyEquals - | Self::DivideEquals - | Self::ModuloEquals - | Self::BitwiseNotEquals - | Self::BitwiseXorEquals - | Self::BitwiseOrEquals - | Self::BitwiseAndEquals - | Self::LogicalNotEquals - | Self::ExponentiationEquals - | Self::LeftShiftEquals - | Self::RightShiftEquals - | Self::LogicalOrEquals - | Self::LogicalAndEquals - ) - } - - pub fn is_unary_operator(&self) -> bool { - matches!(self, Self::BitwiseNot | Self::LogicalNot | Self::Asterisk) - } - - pub fn is_binary_operator(&self) -> bool { - matches!( - self, - Self::DoubleEquals - | Self::LessThan - | Self::LessThanOrEqual - | Self::GreaterThan - | Self::GreaterThanOrEqual - | Self::Plus - | Self::Minus - | Self::Asterisk - | Self::Divide - | Self::Modulo - | Self::BitwiseXor - | Self::BitwiseOr - | Self::BitwiseAnd - | Self::Exponentiation - | Self::LeftShift - | Self::RightShift - | Self::LogicalOr - | Self::LogicalAnd - ) - } - - pub fn is_literal(&self) -> bool { - matches!( - self, - Self::CharLiteral | Self::StringLiteral | Self::FloatLiteral | Self::IntegerLiteral(_) - ) - } - - fn grouping_symbol_from(c: char) -> TokenKind { - match c { - '(' => Self::OpenParen, - ')' => Self::CloseParen, - '[' => Self::OpenBracket, - ']' => Self::CloseBracket, - '{' => Self::OpenCurlyBracket, - '}' => Self::CloseCurlyBracket, - x => unreachable!("Illegal non-grouping symbol `{}`", x), - } - } - - fn single_operator_from(c: char) -> TokenKind { - match c { - '=' => Self::Equals, - '<' => Self::LessThan, - '>' => Self::GreaterThan, - '!' => Self::LogicalNot, - '+' => Self::Plus, - '-' => Self::Minus, - '*' => Self::Asterisk, - '/' => Self::Divide, - '%' => Self::Modulo, - '~' => Self::BitwiseNot, - '^' => Self::BitwiseXor, - '|' => Self::BitwiseOr, - '&' => Self::BitwiseAnd, - x => unreachable!("Illegal single non-operator `{}`", x), - } - } - - fn double_operator_from(c: char) -> TokenKind { - match c { - '*' => Self::Exponentiation, - '<' => Self::LeftShift, - '>' => Self::RightShift, - '&' => Self::LogicalAnd, - '|' => Self::LogicalOr, - x => unreachable!("Illegal double non-operator `{}`", x), - } - } +use self::token::{TokenKind, IntegerLiteralKind, Token, KeywordKind}; - fn single_equals_operator_from(c: char) -> TokenKind { - match c { - '=' => Self::DoubleEquals, - '<' => Self::LessThanOrEqual, - '>' => Self::GreaterThanOrEqual, - '!' => Self::LogicalNotEquals, - '+' => Self::PlusEquals, - '-' => Self::MinusEquals, - '*' => Self::MultiplyEquals, - '/' => Self::DivideEquals, - '%' => Self::ModuloEquals, - '~' => Self::BitwiseNotEquals, - '^' => Self::BitwiseXorEquals, - '|' => Self::BitwiseOrEquals, - '&' => Self::BitwiseAndEquals, - x => unreachable!("Illegal single non-equals-operator `{}`", x), - } - } - - fn double_equals_operator_from(c: char) -> TokenKind { - match c { - '*' => Self::ExponentiationEquals, - '<' => Self::LeftShiftEquals, - '>' => Self::RightShiftEquals, - '&' => Self::LogicalAndEquals, - '|' => Self::LogicalOrEquals, - x => unreachable!("Illegal double non-equals-operator `{}`", x), - } - } - - fn other_symbol_from(c: impl Into) -> TokenKind { - match c.into().as_str() { - "->" => Self::Arrow, - "?" => Self::QuestionMark, - "!." => Self::BangCoalescing, - "," => Self::Comma, - ";" => Self::Semicolon, - ":" => Self::Colon, - "::" => Self::DoubleColon, - "." => Self::Period, - "\\" => Self::BackSlash, - "_" => Self::Underscore, - "@" => Self::Asperand, - "#" => Self::Hash, - "$" => Self::DollarSign, - x => unreachable!("Illegal non-other-symbol `{}`", x), - } - } -} +pub mod token; +mod test; /// Represents an error when lexing a file #[derive(Debug)] @@ -1030,7 +720,7 @@ impl Lexer { Ok(None) } - fn _next(&self) -> Result, LexerError> { + fn get_next_cached_or_read(&self) -> Result, LexerError> { let peek_forward = unsafe { &mut *self.peek_forward.get() }; // Check if more tokens have already been precomputed for us @@ -1045,8 +735,9 @@ impl Lexer { /// Lexer public interface impl Lexer { + #[allow(clippy::should_implement_trait)] pub fn next(&mut self) -> Result, LexerError> { - self._next() + self.get_next_cached_or_read() } pub fn peek(&self) -> Result, LexerError> { @@ -1061,7 +752,7 @@ impl Lexer { let peek_forward = unsafe { &mut *self.peek_forward.get() }; // If there are more tokens - if let Some(token) = self._next()? { + if let Some(token) = self.get_next_cached_or_read()? { // Store the token for later usage peek_forward.push_back(token); @@ -1114,521 +805,3 @@ impl Lexer { character_reader.get_source_file() } } - -#[cfg(test)] -mod test { - use crate::{ - frontend::lexer::{IntegerLiteralKind::*, KeywordKind::*, Token as LT, TokenKind::*}, - frontend::reader::FileReader, - }; - - use super::{Lexer, LexerError, LexerErrorKind::*}; - - fn compare_input_to_expected(input: &str, expected_tokens: Vec) { - // Collect tokens from lexer - let reader = FileReader::from(input.to_owned()); - let mut lexer = Lexer::new(reader); - - let mut lexer_tokens = Vec::new(); - - while lexer.has_next().unwrap() { - lexer_tokens.push(lexer.next().unwrap().unwrap()) - } - - assert_eq!( - lexer_tokens, expected_tokens, - "Lexer tokens did not match expected" - ) - } - - fn lex_input(input: &str) -> Result, LexerError> { - // Collect tokens from lexer - let reader = FileReader::from(input.to_owned()); - let mut lexer = Lexer::new(reader); - - let mut lexer_tokens = Vec::new(); - - while lexer.has_next()? { - lexer_tokens.push(lexer.next()?.unwrap()) - } - - Ok(lexer_tokens) - } - - #[test] - fn basic_example() { - compare_input_to_expected( - r#"fn main() { - leak node = Node() - - println() - }"#, - vec![ - LT::from((Keyword(Fn), "fn")), - LT::from((Identifier, "main")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((OpenCurlyBracket, "{")), - LT::from((Newline, "\n")), - LT::from((Keyword(Leak), "leak")), - LT::from((Identifier, "node")), - LT::from((Equals, "=")), - LT::from((Identifier, "Node")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((Identifier, "println")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((CloseCurlyBracket, "}")), - ], - ) - } - - #[test] - fn removes_comments() { - compare_input_to_expected( - r#"// this is a comment - fn main() { // this is a comment - leak node = Node() - // this is a comment - println() - // this is a comment - }// this is a comment"#, - vec![ - LT::from((Newline, "\n")), - LT::from((Keyword(Fn), "fn")), - LT::from((Identifier, "main")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((OpenCurlyBracket, "{")), - LT::from((Newline, "\n")), - LT::from((Keyword(Leak), "leak")), - LT::from((Identifier, "node")), - LT::from((Equals, "=")), - LT::from((Identifier, "Node")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((Identifier, "println")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((CloseCurlyBracket, "}")), - ], - ) - } - - #[test] - fn basic_single_operators() { - compare_input_to_expected( - r#"= == < <= > >= + += - -= * *= / /= % %= ~ ~= ^ ^= | |= & &= ! !="#, - vec![ - LT::from((Equals, "=")), - LT::from((DoubleEquals, "==")), - LT::from((LessThan, "<")), - LT::from((LessThanOrEqual, "<=")), - LT::from((GreaterThan, ">")), - LT::from((GreaterThanOrEqual, ">=")), - LT::from((Plus, "+")), - LT::from((PlusEquals, "+=")), - LT::from((Minus, "-")), - LT::from((MinusEquals, "-=")), - LT::from((Asterisk, "*")), - LT::from((MultiplyEquals, "*=")), - LT::from((Divide, "/")), - LT::from((DivideEquals, "/=")), - LT::from((Modulo, "%")), - LT::from((ModuloEquals, "%=")), - LT::from((BitwiseNot, "~")), - LT::from((BitwiseNotEquals, "~=")), - LT::from((BitwiseXor, "^")), - LT::from((BitwiseXorEquals, "^=")), - LT::from((BitwiseOr, "|")), - LT::from((BitwiseOrEquals, "|=")), - LT::from((BitwiseAnd, "&")), - LT::from((BitwiseAndEquals, "&=")), - LT::from((LogicalNot, "!")), - LT::from((LogicalNotEquals, "!=")), - ], - ) - } - - #[test] - fn basic_double_operators() { - compare_input_to_expected( - r#"** **= << <<= >> >>= || ||= && &&="#, - vec![ - LT::from((Exponentiation, "**")), - LT::from((ExponentiationEquals, "**=")), - LT::from((LeftShift, "<<")), - LT::from((LeftShiftEquals, "<<=")), - LT::from((RightShift, ">>")), - LT::from((RightShiftEquals, ">>=")), - LT::from((LogicalOr, "||")), - LT::from((LogicalOrEquals, "||=")), - LT::from((LogicalAnd, "&&")), - LT::from((LogicalAndEquals, "&&=")), - ], - ) - } - - #[test] - fn double_non_operators() { - compare_input_to_expected( - r#"-> ->=-> - >"#, - vec![ - LT::from((Arrow, "->")), - LT::from((Arrow, "->")), - LT::from((Equals, "=")), - LT::from((Arrow, "->")), - LT::from((Minus, "-")), - LT::from((GreaterThan, ">")), - ], - ) - } - - #[test] - fn simple_string() { - compare_input_to_expected( - r#" "your mom 1""your mom 2" "your mom 3" "#, - vec![ - LT::from((StringLiteral, r#""your mom 1""#)), - LT::from((StringLiteral, r#""your mom 2""#)), - LT::from((StringLiteral, r#""your mom 3""#)), - ], - ) - } - - #[test] - fn string_quote_escapes() { - compare_input_to_expected( - r#" "your mom \"1\"" "your mom 2" "#, - vec![ - LT::from((StringLiteral, r#""your mom \"1\"""#)), - LT::from((StringLiteral, r#""your mom 2""#)), - ], - ) - } - - #[test] - fn unclosed_string() { - assert_eq!( - lex_input(r#" "this is a string that doesn't have a closing double quote"#), - Err(LexerError::from(UnclosedWrappedLiteral(StringLiteral))) - ) - } - - #[test] - fn simple_chars() { - compare_input_to_expected( - r" 'a''b' 'c' ", - vec![ - LT::from((CharLiteral, r"'a'")), - LT::from((CharLiteral, r"'b'")), - LT::from((CharLiteral, r"'c'")), - ], - ) - } - - #[test] - fn char_escapes() { - compare_input_to_expected( - r" 'a''b' '\'' ", - vec![ - LT::from((CharLiteral, r"'a'")), - LT::from((CharLiteral, r"'b'")), - LT::from((CharLiteral, r"'\''")), - ], - ) - } - - #[test] - fn unclosed_char() { - assert_eq!( - lex_input(r#" 'a"#), - Err(LexerError::from(UnclosedWrappedLiteral(CharLiteral))) - ) - } - - #[test] - fn basic_hex_literal() { - compare_input_to_expected( - "0xFFFF 0x123456789ABCDEF 0x01234567", - vec![ - LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), - LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), - LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), - ], - ) - } - - #[test] - fn underscores_in_hex_literal() { - compare_input_to_expected( - "0x__FF__F_F 0x_1_2_3456_789AB_CDE_F_ 0x_01_23_45_67", - vec![ - LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), - LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), - LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), - ], - ) - } - - #[test] - fn unexpected_end_of_hex() { - assert_eq!( - lex_input(r"0x"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Hexadecimal))) - ) - } - - #[test] - fn illegal_hex_chars() { - assert_eq!( - lex_input(r"0xasdfgh"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Hexadecimal - ))) - ) - } - - #[test] - fn hex_literal_on_boundary() { - compare_input_to_expected( - "(0x42069)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Hexadecimal), "0x42069")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_bin_literal() { - compare_input_to_expected( - "0b00010011 0b111010100001 0b0", - vec![ - LT::from((IntegerLiteral(Binary), "0b00010011")), - LT::from((IntegerLiteral(Binary), "0b111010100001")), - LT::from((IntegerLiteral(Binary), "0b0")), - ], - ) - } - - #[test] - fn underscores_in_bin_literal() { - compare_input_to_expected( - "0b_00_0_100_11 0b1_1_101_01000_01_ 0b_0_", - vec![ - LT::from((IntegerLiteral(Binary), "0b00010011")), - LT::from((IntegerLiteral(Binary), "0b111010100001")), - LT::from((IntegerLiteral(Binary), "0b0")), - ], - ) - } - - #[test] - fn unexpected_end_of_bin() { - assert_eq!( - lex_input(r"0b"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Binary))) - ) - } - - #[test] - fn illegal_bin_chars() { - assert_eq!( - lex_input(r"0b101a"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Binary - ))) - ) - } - - #[test] - fn bin_literal_on_boundary() { - compare_input_to_expected( - "(0b01000101)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Binary), "0b01000101")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_oct_literal() { - compare_input_to_expected( - "0o01234567 0o161343 0o00000001", - vec![ - LT::from((IntegerLiteral(Octal), "0o01234567")), - LT::from((IntegerLiteral(Octal), "0o161343")), - LT::from((IntegerLiteral(Octal), "0o00000001")), - ], - ) - } - - #[test] - fn underscores_in_oct_literal() { - compare_input_to_expected( - "0o01_234_56_7 0o_16134_3 0o000_00001_", - vec![ - LT::from((IntegerLiteral(Octal), "0o01234567")), - LT::from((IntegerLiteral(Octal), "0o161343")), - LT::from((IntegerLiteral(Octal), "0o00000001")), - ], - ) - } - - #[test] - fn unexpected_end_of_oct() { - assert_eq!( - lex_input(r"0o"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Octal))) - ) - } - - #[test] - fn illegal_oct_chars() { - assert_eq!( - lex_input(r"0o1234567890abcdef"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Octal - ))) - ) - } - - #[test] - fn oct_literal_on_boundary() { - compare_input_to_expected( - "(0o420)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Octal), "0o420")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_dec_literal() { - compare_input_to_expected( - "123456789 1 0 2", - vec![ - LT::from((IntegerLiteral(Decimal), "123456789")), - LT::from((IntegerLiteral(Decimal), "1")), - LT::from((IntegerLiteral(Decimal), "0")), - LT::from((IntegerLiteral(Decimal), "2")), - ], - ) - } - - #[test] - fn underscores_in_dec_literal() { - compare_input_to_expected( - "1234_5_6789 1_ 0 2_2", - vec![ - LT::from((IntegerLiteral(Decimal), "123456789")), - LT::from((IntegerLiteral(Decimal), "1")), - LT::from((IntegerLiteral(Decimal), "0")), - LT::from((IntegerLiteral(Decimal), "22")), - ], - ) - } - - #[test] - fn illegal_dec_chars() { - assert_eq!( - lex_input(r"0123456789abcdef"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Decimal - ))) - ) - } - - #[test] - fn dec_literal_on_boundary() { - compare_input_to_expected( - "(69)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Decimal), "69")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_float_literal() { - compare_input_to_expected( - "0.0 0.1 1.0 420.69", - vec![ - LT::from((FloatLiteral, "0.0")), - LT::from((FloatLiteral, "0.1")), - LT::from((FloatLiteral, "1.0")), - LT::from((FloatLiteral, "420.69")), - ], - ) - } - - #[test] - fn underscores_in_float_literal() { - compare_input_to_expected( - "0_.0 0._1 1.0 1337_420.69", - vec![ - LT::from((FloatLiteral, "0.0")), - LT::from((FloatLiteral, "0.1")), - LT::from((FloatLiteral, "1.0")), - LT::from((FloatLiteral, "1337420.69")), - ], - ) - } - - #[test] - fn illegal_float_chars() { - assert_eq!( - lex_input(r"420.a69"), - Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) - ); - - assert_eq!( - lex_input(r"420.6s9"), - Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) - ); - } - - #[test] - fn float_literal_on_boundary() { - compare_input_to_expected( - "(420.69)", - vec![ - LT::from((OpenParen, "(")), - LT::from((FloatLiteral, "420.69")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn float_double_period() { - assert_eq!( - lex_input(r"420.69.1337"), - Err(LexerError::from(UnexpectedExtraPeriodInFloatLiteral)) - ); - } - - #[test] - fn float_end_with_period() { - assert_eq!( - lex_input(r"420."), - Err(LexerError::from(UnexpectedEndOfFloatLiteral)) - ); - } -} diff --git a/compiler/src/frontend/lexer/test.rs b/compiler/src/frontend/lexer/test.rs new file mode 100644 index 0000000..c535b6d --- /dev/null +++ b/compiler/src/frontend/lexer/test.rs @@ -0,0 +1,516 @@ +#![cfg(test)] + +use crate::{ + frontend::lexer::{IntegerLiteralKind::*, KeywordKind::*, Token as LT, TokenKind::*}, + frontend::reader::FileReader, +}; + +use super::{Lexer, LexerError, LexerErrorKind::*}; + +fn compare_input_to_expected(input: &str, expected_tokens: Vec) { + // Collect tokens from lexer + let reader = FileReader::from(input.to_owned()); + let mut lexer = Lexer::new(reader); + + let mut lexer_tokens = Vec::new(); + + while lexer.has_next().unwrap() { + lexer_tokens.push(lexer.next().unwrap().unwrap()) + } + + assert_eq!( + lexer_tokens, expected_tokens, + "Lexer tokens did not match expected" + ) +} + +fn lex_input(input: &str) -> Result, LexerError> { + // Collect tokens from lexer + let reader = FileReader::from(input.to_owned()); + let mut lexer = Lexer::new(reader); + + let mut lexer_tokens = Vec::new(); + + while lexer.has_next()? { + lexer_tokens.push(lexer.next()?.unwrap()) + } + + Ok(lexer_tokens) +} + +#[test] +fn basic_example() { + compare_input_to_expected( + r#"fn main() { + leak node = Node() + + println() + }"#, + vec![ + LT::from((Keyword(Fn), "fn")), + LT::from((Identifier, "main")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((OpenCurlyBracket, "{")), + LT::from((Newline, "\n")), + LT::from((Keyword(Leak), "leak")), + LT::from((Identifier, "node")), + LT::from((Equals, "=")), + LT::from((Identifier, "Node")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((Identifier, "println")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((CloseCurlyBracket, "}")), + ], + ) +} + +#[test] +fn removes_comments() { + compare_input_to_expected( + r#"// this is a comment + fn main() { // this is a comment + leak node = Node() + // this is a comment + println() + // this is a comment + }// this is a comment"#, + vec![ + LT::from((Newline, "\n")), + LT::from((Keyword(Fn), "fn")), + LT::from((Identifier, "main")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((OpenCurlyBracket, "{")), + LT::from((Newline, "\n")), + LT::from((Keyword(Leak), "leak")), + LT::from((Identifier, "node")), + LT::from((Equals, "=")), + LT::from((Identifier, "Node")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((Identifier, "println")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((CloseCurlyBracket, "}")), + ], + ) +} + +#[test] +fn basic_single_operators() { + compare_input_to_expected( + r#"= == < <= > >= + += - -= * *= / /= % %= ~ ~= ^ ^= | |= & &= ! !="#, + vec![ + LT::from((Equals, "=")), + LT::from((DoubleEquals, "==")), + LT::from((LessThan, "<")), + LT::from((LessThanOrEqual, "<=")), + LT::from((GreaterThan, ">")), + LT::from((GreaterThanOrEqual, ">=")), + LT::from((Plus, "+")), + LT::from((PlusEquals, "+=")), + LT::from((Minus, "-")), + LT::from((MinusEquals, "-=")), + LT::from((Asterisk, "*")), + LT::from((MultiplyEquals, "*=")), + LT::from((Divide, "/")), + LT::from((DivideEquals, "/=")), + LT::from((Modulo, "%")), + LT::from((ModuloEquals, "%=")), + LT::from((BitwiseNot, "~")), + LT::from((BitwiseNotEquals, "~=")), + LT::from((BitwiseXor, "^")), + LT::from((BitwiseXorEquals, "^=")), + LT::from((BitwiseOr, "|")), + LT::from((BitwiseOrEquals, "|=")), + LT::from((BitwiseAnd, "&")), + LT::from((BitwiseAndEquals, "&=")), + LT::from((LogicalNot, "!")), + LT::from((LogicalNotEquals, "!=")), + ], + ) +} + +#[test] +fn basic_double_operators() { + compare_input_to_expected( + r#"** **= << <<= >> >>= || ||= && &&="#, + vec![ + LT::from((Exponentiation, "**")), + LT::from((ExponentiationEquals, "**=")), + LT::from((LeftShift, "<<")), + LT::from((LeftShiftEquals, "<<=")), + LT::from((RightShift, ">>")), + LT::from((RightShiftEquals, ">>=")), + LT::from((LogicalOr, "||")), + LT::from((LogicalOrEquals, "||=")), + LT::from((LogicalAnd, "&&")), + LT::from((LogicalAndEquals, "&&=")), + ], + ) +} + +#[test] +fn double_non_operators() { + compare_input_to_expected( + r#"-> ->=-> - >"#, + vec![ + LT::from((Arrow, "->")), + LT::from((Arrow, "->")), + LT::from((Equals, "=")), + LT::from((Arrow, "->")), + LT::from((Minus, "-")), + LT::from((GreaterThan, ">")), + ], + ) +} + +#[test] +fn simple_string() { + compare_input_to_expected( + r#" "your mom 1""your mom 2" "your mom 3" "#, + vec![ + LT::from((StringLiteral, r#""your mom 1""#)), + LT::from((StringLiteral, r#""your mom 2""#)), + LT::from((StringLiteral, r#""your mom 3""#)), + ], + ) +} + +#[test] +fn string_quote_escapes() { + compare_input_to_expected( + r#" "your mom \"1\"" "your mom 2" "#, + vec![ + LT::from((StringLiteral, r#""your mom \"1\"""#)), + LT::from((StringLiteral, r#""your mom 2""#)), + ], + ) +} + +#[test] +fn unclosed_string() { + assert_eq!( + lex_input(r#" "this is a string that doesn't have a closing double quote"#), + Err(LexerError::from(UnclosedWrappedLiteral(StringLiteral))) + ) +} + +#[test] +fn simple_chars() { + compare_input_to_expected( + r" 'a''b' 'c' ", + vec![ + LT::from((CharLiteral, r"'a'")), + LT::from((CharLiteral, r"'b'")), + LT::from((CharLiteral, r"'c'")), + ], + ) +} + +#[test] +fn char_escapes() { + compare_input_to_expected( + r" 'a''b' '\'' ", + vec![ + LT::from((CharLiteral, r"'a'")), + LT::from((CharLiteral, r"'b'")), + LT::from((CharLiteral, r"'\''")), + ], + ) +} + +#[test] +fn unclosed_char() { + assert_eq!( + lex_input(r#" 'a"#), + Err(LexerError::from(UnclosedWrappedLiteral(CharLiteral))) + ) +} + +#[test] +fn basic_hex_literal() { + compare_input_to_expected( + "0xFFFF 0x123456789ABCDEF 0x01234567", + vec![ + LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), + LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), + LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), + ], + ) +} + +#[test] +fn underscores_in_hex_literal() { + compare_input_to_expected( + "0x__FF__F_F 0x_1_2_3456_789AB_CDE_F_ 0x_01_23_45_67", + vec![ + LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), + LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), + LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), + ], + ) +} + +#[test] +fn unexpected_end_of_hex() { + assert_eq!( + lex_input(r"0x"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Hexadecimal))) + ) +} + +#[test] +fn illegal_hex_chars() { + assert_eq!( + lex_input(r"0xasdfgh"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Hexadecimal + ))) + ) +} + +#[test] +fn hex_literal_on_boundary() { + compare_input_to_expected( + "(0x42069)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Hexadecimal), "0x42069")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_bin_literal() { + compare_input_to_expected( + "0b00010011 0b111010100001 0b0", + vec![ + LT::from((IntegerLiteral(Binary), "0b00010011")), + LT::from((IntegerLiteral(Binary), "0b111010100001")), + LT::from((IntegerLiteral(Binary), "0b0")), + ], + ) +} + +#[test] +fn underscores_in_bin_literal() { + compare_input_to_expected( + "0b_00_0_100_11 0b1_1_101_01000_01_ 0b_0_", + vec![ + LT::from((IntegerLiteral(Binary), "0b00010011")), + LT::from((IntegerLiteral(Binary), "0b111010100001")), + LT::from((IntegerLiteral(Binary), "0b0")), + ], + ) +} + +#[test] +fn unexpected_end_of_bin() { + assert_eq!( + lex_input(r"0b"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Binary))) + ) +} + +#[test] +fn illegal_bin_chars() { + assert_eq!( + lex_input(r"0b101a"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Binary + ))) + ) +} + +#[test] +fn bin_literal_on_boundary() { + compare_input_to_expected( + "(0b01000101)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Binary), "0b01000101")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_oct_literal() { + compare_input_to_expected( + "0o01234567 0o161343 0o00000001", + vec![ + LT::from((IntegerLiteral(Octal), "0o01234567")), + LT::from((IntegerLiteral(Octal), "0o161343")), + LT::from((IntegerLiteral(Octal), "0o00000001")), + ], + ) +} + +#[test] +fn underscores_in_oct_literal() { + compare_input_to_expected( + "0o01_234_56_7 0o_16134_3 0o000_00001_", + vec![ + LT::from((IntegerLiteral(Octal), "0o01234567")), + LT::from((IntegerLiteral(Octal), "0o161343")), + LT::from((IntegerLiteral(Octal), "0o00000001")), + ], + ) +} + +#[test] +fn unexpected_end_of_oct() { + assert_eq!( + lex_input(r"0o"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Octal))) + ) +} + +#[test] +fn illegal_oct_chars() { + assert_eq!( + lex_input(r"0o1234567890abcdef"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Octal + ))) + ) +} + +#[test] +fn oct_literal_on_boundary() { + compare_input_to_expected( + "(0o420)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Octal), "0o420")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_dec_literal() { + compare_input_to_expected( + "123456789 1 0 2", + vec![ + LT::from((IntegerLiteral(Decimal), "123456789")), + LT::from((IntegerLiteral(Decimal), "1")), + LT::from((IntegerLiteral(Decimal), "0")), + LT::from((IntegerLiteral(Decimal), "2")), + ], + ) +} + +#[test] +fn underscores_in_dec_literal() { + compare_input_to_expected( + "1234_5_6789 1_ 0 2_2", + vec![ + LT::from((IntegerLiteral(Decimal), "123456789")), + LT::from((IntegerLiteral(Decimal), "1")), + LT::from((IntegerLiteral(Decimal), "0")), + LT::from((IntegerLiteral(Decimal), "22")), + ], + ) +} + +#[test] +fn illegal_dec_chars() { + assert_eq!( + lex_input(r"0123456789abcdef"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Decimal + ))) + ) +} + +#[test] +fn dec_literal_on_boundary() { + compare_input_to_expected( + "(69)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Decimal), "69")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_float_literal() { + compare_input_to_expected( + "0.0 0.1 1.0 420.69", + vec![ + LT::from((FloatLiteral, "0.0")), + LT::from((FloatLiteral, "0.1")), + LT::from((FloatLiteral, "1.0")), + LT::from((FloatLiteral, "420.69")), + ], + ) +} + +#[test] +fn underscores_in_float_literal() { + compare_input_to_expected( + "0_.0 0._1 1.0 1337_420.69", + vec![ + LT::from((FloatLiteral, "0.0")), + LT::from((FloatLiteral, "0.1")), + LT::from((FloatLiteral, "1.0")), + LT::from((FloatLiteral, "1337420.69")), + ], + ) +} + +#[test] +fn illegal_float_chars() { + assert_eq!( + lex_input(r"420.a69"), + Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) + ); + + assert_eq!( + lex_input(r"420.6s9"), + Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) + ); +} + +#[test] +fn float_literal_on_boundary() { + compare_input_to_expected( + "(420.69)", + vec![ + LT::from((OpenParen, "(")), + LT::from((FloatLiteral, "420.69")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn float_double_period() { + assert_eq!( + lex_input(r"420.69.1337"), + Err(LexerError::from(UnexpectedExtraPeriodInFloatLiteral)) + ); +} + +#[test] +fn float_end_with_period() { + assert_eq!( + lex_input(r"420."), + Err(LexerError::from(UnexpectedEndOfFloatLiteral)) + ); +} \ No newline at end of file diff --git a/compiler/src/frontend/lexer/token.rs b/compiler/src/frontend/lexer/token.rs new file mode 100644 index 0000000..7ba411e --- /dev/null +++ b/compiler/src/frontend/lexer/token.rs @@ -0,0 +1,318 @@ +use std::fmt::{Debug, Display}; + +use crate::frontend::position::{Position, Span}; + +#[allow(dead_code)] +#[cfg_attr(not(test), derive(Debug))] +#[derive(Clone)] +pub struct Token { + pub kind: TokenKind, + pub text: String, + pub span: Span, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum TokenKind { + // Significant Whitespace + Newline, + + // Words + Keyword(KeywordKind), // leak + Identifier, // YourMom + + // Grouping + OpenParen, // ( + CloseParen, // ) + OpenBracket, // [ + CloseBracket, // ] + OpenCurlyBracket, // { + CloseCurlyBracket, // } + + // Literals + StringLiteral, // "your mom" + CharLiteral, // 'd' + IntegerLiteral(IntegerLiteralKind), // 69 + FloatLiteral, // 420.69 + + // Single Operators + Equals, // = + DoubleEquals, // == + LessThan, // < + LessThanOrEqual, // <= + GreaterThan, // > + GreaterThanOrEqual, // >= + Plus, // + + PlusEquals, // += + Minus, // - + MinusEquals, // -= + Asterisk, // * + MultiplyEquals, // *= + Divide, // / + DivideEquals, // /= + Modulo, // % + ModuloEquals, // %= + BitwiseNot, // ~ + BitwiseNotEquals, // ~= + BitwiseXor, // ^ + BitwiseXorEquals, // ^= + BitwiseOr, // | + BitwiseOrEquals, // |= + BitwiseAnd, // & + BitwiseAndEquals, // &= + LogicalNot, // ! + LogicalNotEquals, // != + + // Double Operators + Exponentiation, // ** + ExponentiationEquals, // **= + LeftShift, // << + LeftShiftEquals, // <<= + RightShift, // >> + RightShiftEquals, // >>= + LogicalOr, // || + LogicalOrEquals, // ||= + LogicalAnd, // && + LogicalAndEquals, // &&= + + // Non-Operator symbols + Arrow, // -> + QuestionMark, // ? + Comma, // , + Semicolon, // ; + Colon, // : + DoubleColon, // :: + Period, // . + BangCoalescing, // !. + BackSlash, // \ + Underscore, // _ + Asperand, // @ + Hash, // # + DollarSign, // $ +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum IntegerLiteralKind { + Decimal, + Hexadecimal, + Binary, + Octal, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum KeywordKind { + Fn, + Struct, + Leak, + Hold, + Perm, + If, + Else, + While, + For, + Yeet, +} + +impl From<(TokenKind, T)> for Token +where + T: Into + Sized, +{ + fn from((kind, text): (TokenKind, T)) -> Self { + Self { + kind, + text: text.into(), + span: Span::from(Position::new()), + } + } +} + +#[cfg(test)] +impl Debug for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Token") + .field("kind", &self.kind) + .field("text", &self.text) + .finish() + } +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} => {:?}", self.kind, self.text) + } +} + +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.text == other.text + } +} + +impl TokenKind { + pub fn is_assignment_operator(&self) -> bool { + matches!( + self, + Self::Equals + | Self::PlusEquals + | Self::MinusEquals + | Self::MultiplyEquals + | Self::DivideEquals + | Self::ModuloEquals + | Self::BitwiseNotEquals + | Self::BitwiseXorEquals + | Self::BitwiseOrEquals + | Self::BitwiseAndEquals + | Self::LogicalNotEquals + | Self::ExponentiationEquals + | Self::LeftShiftEquals + | Self::RightShiftEquals + | Self::LogicalOrEquals + | Self::LogicalAndEquals + ) + } + + pub fn is_unary_operator(&self) -> bool { + matches!(self, Self::BitwiseNot | Self::LogicalNot | Self::Asterisk) + } + + pub fn is_binary_operator(&self) -> bool { + matches!( + self, + Self::DoubleEquals + | Self::LessThan + | Self::LessThanOrEqual + | Self::GreaterThan + | Self::GreaterThanOrEqual + | Self::Plus + | Self::Minus + | Self::Asterisk + | Self::Divide + | Self::Modulo + | Self::BitwiseXor + | Self::BitwiseOr + | Self::BitwiseAnd + | Self::Exponentiation + | Self::LeftShift + | Self::RightShift + | Self::LogicalOr + | Self::LogicalAnd + ) + } + + pub fn is_literal(&self) -> bool { + matches!( + self, + Self::CharLiteral | Self::StringLiteral | Self::FloatLiteral | Self::IntegerLiteral(_) + ) + } + + pub fn grouping_symbol_from(c: char) -> TokenKind { + match c { + '(' => Self::OpenParen, + ')' => Self::CloseParen, + '[' => Self::OpenBracket, + ']' => Self::CloseBracket, + '{' => Self::OpenCurlyBracket, + '}' => Self::CloseCurlyBracket, + x => unreachable!("Illegal non-grouping symbol `{}`", x), + } + } + + pub fn single_operator_from(c: char) -> TokenKind { + match c { + '=' => Self::Equals, + '<' => Self::LessThan, + '>' => Self::GreaterThan, + '!' => Self::LogicalNot, + '+' => Self::Plus, + '-' => Self::Minus, + '*' => Self::Asterisk, + '/' => Self::Divide, + '%' => Self::Modulo, + '~' => Self::BitwiseNot, + '^' => Self::BitwiseXor, + '|' => Self::BitwiseOr, + '&' => Self::BitwiseAnd, + x => unreachable!("Illegal single non-operator `{}`", x), + } + } + + pub fn double_operator_from(c: char) -> TokenKind { + match c { + '*' => Self::Exponentiation, + '<' => Self::LeftShift, + '>' => Self::RightShift, + '&' => Self::LogicalAnd, + '|' => Self::LogicalOr, + x => unreachable!("Illegal double non-operator `{}`", x), + } + } + + pub fn single_equals_operator_from(c: char) -> TokenKind { + match c { + '=' => Self::DoubleEquals, + '<' => Self::LessThanOrEqual, + '>' => Self::GreaterThanOrEqual, + '!' => Self::LogicalNotEquals, + '+' => Self::PlusEquals, + '-' => Self::MinusEquals, + '*' => Self::MultiplyEquals, + '/' => Self::DivideEquals, + '%' => Self::ModuloEquals, + '~' => Self::BitwiseNotEquals, + '^' => Self::BitwiseXorEquals, + '|' => Self::BitwiseOrEquals, + '&' => Self::BitwiseAndEquals, + x => unreachable!("Illegal single non-equals-operator `{}`", x), + } + } + + pub fn double_equals_operator_from(c: char) -> TokenKind { + match c { + '*' => Self::ExponentiationEquals, + '<' => Self::LeftShiftEquals, + '>' => Self::RightShiftEquals, + '&' => Self::LogicalAndEquals, + '|' => Self::LogicalOrEquals, + x => unreachable!("Illegal double non-equals-operator `{}`", x), + } + } + + pub fn other_symbol_from(c: impl Into) -> TokenKind { + match c.into().as_str() { + "->" => Self::Arrow, + "?" => Self::QuestionMark, + "!." => Self::BangCoalescing, + "," => Self::Comma, + ";" => Self::Semicolon, + ":" => Self::Colon, + "::" => Self::DoubleColon, + "." => Self::Period, + "\\" => Self::BackSlash, + "_" => Self::Underscore, + "@" => Self::Asperand, + "#" => Self::Hash, + "$" => Self::DollarSign, + x => unreachable!("Illegal non-other-symbol `{}`", x), + } + } +} + +impl TryFrom<&String> for KeywordKind { + type Error = (); + + fn try_from(value: &String) -> Result { + Ok(match value.as_str() { + "fn" => Self::Fn, + "struct" => Self::Struct, + "leak" => Self::Leak, + "hold" => Self::Hold, + "perm" => Self::Perm, + "if" => Self::If, + "else" => Self::Else, + "while" => Self::While, + "for" => Self::For, + "yeet" => Self::Yeet, + _ => return Err(()), + }) + } +} \ No newline at end of file diff --git a/compiler/src/frontend/parser.rs b/compiler/src/frontend/parser.rs index cbbf33f..37f2b9d 100644 --- a/compiler/src/frontend/parser.rs +++ b/compiler/src/frontend/parser.rs @@ -3,12 +3,13 @@ use std::fmt::Display; use crate::{ common::error::CompilerError, - frontend::lexer::{IntegerLiteralKind, KeywordKind, Lexer, Token, TokenKind}, - frontend::position::{SourceFile, Span}, + frontend::lexer::{ + token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, + Lexer, + }, + frontend::position::{highlight_span, SourceFile, Span}, }; -use super::position::highlight_span; - #[derive(Debug)] pub struct ParseTree { pub root: ParseTreeNode, @@ -1267,8 +1268,11 @@ mod test { use ansi_term::Color; use crate::{ - frontend::lexer::{IntegerLiteralKind, KeywordKind, Lexer, Token, TokenKind}, - frontend::reader::FileReader, + frontend::lexer::Lexer, + frontend::{ + lexer::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, + reader::FileReader, + }, }; use super::{ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind, Parser}; From 61b8edbb8c6384aeb19144db1ac0e9fdcaf30a1f Mon Sep 17 00:00:00 2001 From: Wowkster <49880655+wowkster@users.noreply.github.com> Date: Sat, 5 Aug 2023 20:20:40 -0400 Subject: [PATCH 8/8] refactor: :recycle: simplify error display fmt --- compiler/src/frontend/lexer/mod.rs | 61 +++++------------------------- compiler/src/frontend/parser.rs | 10 ++--- compiler/src/frontend/position.rs | 12 ++++++ 3 files changed, 26 insertions(+), 57 deletions(-) diff --git a/compiler/src/frontend/lexer/mod.rs b/compiler/src/frontend/lexer/mod.rs index 777411b..ea8292b 100644 --- a/compiler/src/frontend/lexer/mod.rs +++ b/compiler/src/frontend/lexer/mod.rs @@ -9,10 +9,12 @@ use crate::{ frontend::reader::CharacterReader, }; -use self::token::{TokenKind, IntegerLiteralKind, Token, KeywordKind}; +use self::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}; + +use super::position::highlight_span; -pub mod token; mod test; +pub mod token; /// Represents an error when lexing a file #[derive(Debug)] @@ -57,55 +59,6 @@ pub enum LexerErrorKind { impl Display for LexerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!( - f, - "{}:{}", - match &self.source_file.path { - Some(file) => file - .canonicalize() - .expect("Could not canonicalize file path") - .to_str() - .expect("Could not convert file path to string") - .trim_start_matches(r"\\?\") - .to_owned(), - None => "".to_owned(), - }, - self.position - )?; - - let lines: Vec<_> = self.source_file.content.lines().collect(); - - // Print the lines around and including the one with the error - let start = if self.position.row < 2 { - 0 - } else { - self.position.row - 2 - } as usize; - - // Print each line and the line number - for (n, line) in lines[start..(self.position.row + 1) as usize] - .iter() - .enumerate() - { - writeln!(f, "{:>3}: {}", n + start + 1, line)?; - } - - // Print the space before the highlight - for _ in 0..self.position.col + 5 { - write!(f, " ")?; - } - - // Print the underline highlight - writeln!(f, "^")?; - - // Print the space before "here" - for _ in 0..self.position.col + 5 { - write!(f, " ")?; - } - - writeln!(f, "here")?; - writeln!(f)?; - match &self.kind { LexerErrorKind::UnexpectedChar(c) => writeln!(f, "Unexpected char `{c}`"), LexerErrorKind::UnclosedWrappedLiteral(kind) => { @@ -126,7 +79,11 @@ impl Display for LexerError { LexerErrorKind::UnexpectedCharactersInIntegerLiteral(kind) => { writeln!(f, "Unexpected characters inside {kind:?} integer literal") } - } + }?; + + highlight_span(f, &self.source_file, Span::from_position(&self.position))?; + + Ok(()) } } diff --git a/compiler/src/frontend/parser.rs b/compiler/src/frontend/parser.rs index 37f2b9d..54e1965 100644 --- a/compiler/src/frontend/parser.rs +++ b/compiler/src/frontend/parser.rs @@ -137,17 +137,17 @@ impl Display for ParserError { f, "Unexpected token {:?}. Expected one of: {:?}", found, expected - )?, + ), ParserErrorKind::UnexpectedKeyword { expected, found } => writeln!( f, "Unexpected keyword {:?}. Expected one of: {:?}", found, expected - )?, - ParserErrorKind::UnexpectedEndOfInput => writeln!(f, "Unexpected end of input.")?, + ), + ParserErrorKind::UnexpectedEndOfInput => writeln!(f, "Unexpected end of input."), ParserErrorKind::IndexIntoNonIdentifier => { - writeln!(f, "Cannot access field of non-struct object.")? + writeln!(f, "Cannot access field of non-struct object.") } - } + }?; highlight_span(f, &self.source_file, self.span.clone())?; diff --git a/compiler/src/frontend/position.rs b/compiler/src/frontend/position.rs index 79d3d3f..b3ed7fb 100644 --- a/compiler/src/frontend/position.rs +++ b/compiler/src/frontend/position.rs @@ -60,6 +60,18 @@ impl Span { Self { start, end } } + pub fn from_position(pos: &Position) -> Self { + let end = Position { + row: pos.row, + col: pos.col + 1, + }; + + Self { + start: pos.clone(), + end, + } + } + pub fn start(&self) -> &Position { &self.start }