diff --git a/Cargo.lock b/Cargo.lock index aff5c8a..e7e5054 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6342bd4f5a1205d7f41e94a41a901f5647c938cdfa96036338e8533c9d6c2450" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", @@ -28,15 +28,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" [[package]] name = "anstyle-parse" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" dependencies = [ "utf8parse", ] @@ -62,15 +62,18 @@ dependencies = [ [[package]] name = "bitflags" -version = "1.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "6c6b2562119bf28c3439f7f02db99faf0aa1a8cdfe5772a2ee155d32227239f0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -80,9 +83,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.2.4" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956ac1f6381d8d82ab4684768f89c0ea3afe66925ceadb4eeb3fc452ffc55d62" +checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d" dependencies = [ "clap_builder", "clap_derive", @@ -91,22 +94,21 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.2.4" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84080e799e54cff944f4b4a4b0e71630b0e0443b25b985175c7dddc1a859b749" +checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" dependencies = [ "anstream", "anstyle", - "bitflags", "clap_lex", "strsim", ] [[package]] name = "clap_derive" -version = "4.2.0" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", "proc-macro2", @@ -116,9 +118,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" [[package]] name = "colorchoice" @@ -134,15 +136,15 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "errno" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", @@ -161,9 +163,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -178,44 +180,32 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "indoc" -version = "2.0.1" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f2cb48b81b1dc9f39676bf99f5499babfec7cd8fe14307f7b3d747208fb5690" - -[[package]] -name = "io-lifetimes" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] +checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", - "io-lifetimes", "rustix", "windows-sys", ] [[package]] name = "itertools" -version = "0.10.5" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ "either", ] @@ -234,15 +224,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.142" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "linux-raw-sys" -version = "0.3.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36eb31c1778188ae1e64398743890d0877fef36d11521ac60406b42016e8c2cf" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" [[package]] name = "mktemp" @@ -255,37 +245,36 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] [[package]] name = "rustix" -version = "0.37.14" +version = "0.38.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b864d3c18a5785a05953adeed93e2dca37ed30f18e69bba9f30079d51f363f" +checksum = "172891ebdceb05aa0005f533a6cbfca599ddd7d966f6f5d4d9b2e70478e70399" dependencies = [ "bitflags", "errno", - "io-lifetimes", "libc", "linux-raw-sys", "windows-sys", @@ -299,9 +288,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "2.0.15" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -310,9 +299,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "utf8parse" @@ -368,9 +357,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", diff --git a/Cargo.toml b/Cargo.toml index 529c35d..003c782 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] +resolver = "2" members = [ "compiler", ] diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 7823689..346a43e 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -7,8 +7,8 @@ edition = "2021" [dependencies] ansi_term = "0.12.1" -clap = { version = "4.1.6", features = ["derive"] } +clap = { version = "4.3.19", features = ["derive"] } diff = "0.1.13" indoc = "2.0.1" -itertools = "0.10.5" +itertools = "0.11.0" mktemp = "0.5.0" diff --git a/compiler/src/backend/codegen/mod.rs b/compiler/src/backend/codegen/mod.rs index 947e2ad..b6ee998 100644 --- a/compiler/src/backend/codegen/mod.rs +++ b/compiler/src/backend/codegen/mod.rs @@ -1,16 +1,16 @@ use std::{path::Path, process::Command, str::FromStr}; -use crate::frontend::ast::LeekAst; +use crate::frontend::ast::Ast; use self::x86_linux_gnu::CodeGeneratorX86LinuxGNU; -use super::LeekCompilerConfig; +use super::CompilerConfig; pub mod x86_64_linux_gnu; pub mod x86_linux_gnu; pub trait CodeGenerator { - fn generate_assembly(&self, ast: LeekAst, compiler_options: &LeekCompilerConfig) -> String; + fn generate_assembly(&self, ast: Ast, compiler_options: &CompilerConfig) -> String; fn create_assembler_command(&self, input_file: &Path, output_file: &Path) -> Command; fn create_linker_command(&self, input_file: &Path, output_file: &Path) -> Command; } diff --git a/compiler/src/backend/codegen/x86_linux_gnu/mod.rs b/compiler/src/backend/codegen/x86_linux_gnu/mod.rs index ca48d15..f98bff2 100644 --- a/compiler/src/backend/codegen/x86_linux_gnu/mod.rs +++ b/compiler/src/backend/codegen/x86_linux_gnu/mod.rs @@ -1,14 +1,14 @@ use indoc::indoc; use std::{path::Path, process::Command}; -use crate::{backend::LeekCompilerConfig, frontend::ast::LeekAst}; +use crate::{backend::CompilerConfig, frontend::ast::Ast}; use super::CodeGenerator; pub struct CodeGeneratorX86LinuxGNU; impl CodeGenerator for CodeGeneratorX86LinuxGNU { - fn generate_assembly(&self, _ast: LeekAst, _compiler_options: &LeekCompilerConfig) -> String { + fn generate_assembly(&self, _ast: Ast, _compiler_options: &CompilerConfig) -> String { String::from(indoc! {" global main diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs index 842bcac..69d1039 100644 --- a/compiler/src/backend/mod.rs +++ b/compiler/src/backend/mod.rs @@ -4,10 +4,10 @@ use itertools::Itertools; use crate::{ common::{ - config::{EmitMode, LeekCompilerConfig}, - error::LeekCompilerError, + config::{CompilerConfig, EmitMode}, + error::CompilerError, }, - frontend::ast::LeekAst, + frontend::ast::Ast, }; use self::codegen::{CodeGenTarget, CodeGenerator}; @@ -33,10 +33,10 @@ macro_rules! display_buffer { } pub fn compile_ast( - ast: LeekAst, - compiler_options: &LeekCompilerConfig, + ast: Ast, + compiler_options: &CompilerConfig, target: CodeGenTarget, -) -> Result<(), LeekCompilerError> { +) -> Result<(), CompilerError> { let code_generator = target.get_code_generator(); // If the output name is specified, use that. diff --git a/compiler/src/common/config.rs b/compiler/src/common/config.rs index 0d6c2f8..326d449 100644 --- a/compiler/src/common/config.rs +++ b/compiler/src/common/config.rs @@ -41,7 +41,7 @@ pub enum EmitMode { AssemblyFile, } -pub struct LeekCompilerConfig { +pub struct CompilerConfig { pub opt_level: OptimizationLevel, pub build_mode: BuildMode, pub emit_mode: EmitMode, diff --git a/compiler/src/common/error.rs b/compiler/src/common/error.rs index 9a446fe..27bccc7 100644 --- a/compiler/src/common/error.rs +++ b/compiler/src/common/error.rs @@ -9,7 +9,7 @@ use crate::{ // TODO: Refactor with thiserror #[derive(Debug)] -pub enum LeekCompilerError { +pub enum CompilerError { FileReadError(FileReadError), // File -> Chars LexerError(LexerError), // Chars -> Tokens ParserError(ParserError), // Tokens -> Parse Tree @@ -19,7 +19,7 @@ pub enum LeekCompilerError { CodeGenError(CodeGenError), // LIR -> ASM } -impl LeekCompilerError { +impl CompilerError { /// Should print to the stderr and exit with a non-zero exit code pub fn report(&self) -> ! { eprintln!("{self}"); @@ -28,18 +28,18 @@ impl LeekCompilerError { } } -impl Display for LeekCompilerError { +impl Display for CompilerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - LeekCompilerError::FileReadError(e) => write!(f, "File Read Error: \n{e}"), - LeekCompilerError::LexerError(e) => write!(f, "Lexer Error: \n{e}"), - LeekCompilerError::ParserError(e) => { + CompilerError::FileReadError(e) => write!(f, "File Read Error: \n{e}"), + CompilerError::LexerError(e) => write!(f, "Lexer Error: \n{e}"), + CompilerError::ParserError(e) => { write!( f, "Parser Error: {e}\n=================================\n\n{e:#?}\n" ) } - LeekCompilerError::TypeCheckingError(e) => { + CompilerError::TypeCheckingError(e) => { write!( f, "Type Error: \n{e}\n=================================\n\n{e:#?}\n" @@ -51,26 +51,26 @@ impl Display for LeekCompilerError { } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: FileReadError) -> Self { - LeekCompilerError::FileReadError(error) + CompilerError::FileReadError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: LexerError) -> Self { - LeekCompilerError::LexerError(error) + CompilerError::LexerError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: ParserError) -> Self { - LeekCompilerError::ParserError(error) + CompilerError::ParserError(error) } } -impl From for LeekCompilerError { +impl From for CompilerError { fn from(error: CodeGenError) -> Self { - LeekCompilerError::CodeGenError(error) + CompilerError::CodeGenError(error) } } diff --git a/compiler/src/frontend/ast/builder.rs b/compiler/src/frontend/ast/builder.rs index e6669d3..ec50ccc 100644 --- a/compiler/src/frontend/ast/builder.rs +++ b/compiler/src/frontend/ast/builder.rs @@ -9,33 +9,25 @@ use crate::{ StructInitialization, StructMethodCall, UnaryExpression, VariableDeclaration, VariableDeclarationKind, }, - lexer::{IntegerLiteralKind, KeywordKind, LeekToken, LeekTokenKind}, + lexer::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, parser::{ParseTree, ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind}, }, }; use super::{ - Expression, IntegerKind, LeekAst, Literal, LiteralKind, PrimitiveKind, Program, + Ast, Expression, IntegerKind, Literal, LiteralKind, PrimitiveKind, ProgramPart, QualifiedIdentifier, Type, VariableAssignment, }; // TODO: Add spans for ast nodes -impl LeekAst { +impl Ast { /// This function is infallible. If there is an error, it is due to a bug in the parser or the builder. /// As such, this function will panic if there is an error. pub fn build_from(parse_tree: ParseTree) -> Self { - let root = Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![], - struct_definitions: vec![], - enum_definitions: vec![], - }; - let mut ast = Self { source_file: parse_tree.source_file.clone(), - root, + items: Vec::new(), }; ast.populate(parse_tree); @@ -49,26 +41,31 @@ impl LeekAst { for node in &program.children { let ParseTreeNode::NonTerminal(top_level_node) = node else { - panic!("Expected top level node to be non-terminal, found {:?}", node); + panic!( + "Expected top level node to be non-terminal, found {:?}", + node + ); }; match top_level_node.kind { - ParseTreeNonTerminalKind::ConstantVariableDeclaration => self - .root - .constant_variables - .push(VariableDeclaration::from_node(top_level_node)), - ParseTreeNonTerminalKind::StaticVariableDeclaration => self - .root - .static_variables - .push(VariableDeclaration::from_node(top_level_node)), - ParseTreeNonTerminalKind::FunctionDefinition => self - .root - .function_definitions - .push(FunctionDefinition::from_node(top_level_node)), - ParseTreeNonTerminalKind::StructDefinition => self - .root - .struct_definitions - .push(StructDefinition::from_node(top_level_node)), + ParseTreeNonTerminalKind::ConstantVariableDeclaration => self.items.push( + ProgramPart::ConstantVariable(VariableDeclaration::from_node(top_level_node)), + ), + ParseTreeNonTerminalKind::StaticVariableDeclaration => { + self.items + .push(ProgramPart::StaticVariable(VariableDeclaration::from_node( + top_level_node, + ))) + } + ParseTreeNonTerminalKind::FunctionDefinition => self.items.push( + ProgramPart::FunctionDefinition(FunctionDefinition::from_node(top_level_node)), + ), + ParseTreeNonTerminalKind::StructDefinition => { + self.items + .push(ProgramPart::StructDefinition(StructDefinition::from_node( + top_level_node, + ))) + } _ => panic!("Unexpected top level node: {:?}", top_level_node), } } @@ -105,7 +102,7 @@ trait FromTerminal where Self: Sized, { - fn from_terminal(node: &LeekToken) -> Self; + fn from_terminal(node: &Token) -> Self; } impl FromNode for Type { @@ -198,23 +195,20 @@ impl FromNode for VariableDeclaration { }; assert!(&[ - LeekTokenKind::Keyword(KeywordKind::Leak), - LeekTokenKind::Keyword(KeywordKind::Hold), - LeekTokenKind::Keyword(KeywordKind::Perm) + TokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Hold), + TokenKind::Keyword(KeywordKind::Perm) ] .contains(&node.children[0].terminal_token().kind)); let identifier = &node.children[1].terminal_token(); - assert_eq!(identifier.kind, LeekTokenKind::Identifier); + assert_eq!(identifier.kind, TokenKind::Identifier); let identifier = identifier.text.clone(); - assert_eq!( - node.children[2].terminal_token().kind, - LeekTokenKind::Equals - ); + assert_eq!(node.children[2].terminal_token().kind, TokenKind::Equals); if let ParseTreeNode::Terminal(terminal) = &node.children[3] { - if terminal.kind == LeekTokenKind::Colon { + if terminal.kind == TokenKind::Colon { todo!("Parse leak with explicit type") } else { unreachable!("Terminal token in leak statement was not a colon") @@ -285,11 +279,11 @@ impl FromNode for Expression { } } -impl From for IntegerKind { - fn from(value: LeekToken) -> Self { - let LeekTokenKind::IntegerLiteral(integer) = value.kind else { - panic!("Expected integer literal, found {:?}", value.kind) - }; +impl From for IntegerKind { + fn from(value: Token) -> Self { + let TokenKind::IntegerLiteral(integer) = value.kind else { + panic!("Expected integer literal, found {:?}", value.kind) + }; // TODO: add support for type specifiers like `u32` and `i32` @@ -314,25 +308,25 @@ impl FromNode for Atom { let atom = match &node.children[0] { ParseTreeNode::Terminal(terminal) => match terminal.kind { - LeekTokenKind::StringLiteral => Atom::Literal(Literal { + TokenKind::StringLiteral => Atom::Literal(Literal { kind: LiteralKind::String(terminal.text.clone()), span: terminal.span.clone(), }), - LeekTokenKind::CharLiteral => Atom::Literal(Literal { + TokenKind::CharLiteral => Atom::Literal(Literal { kind: LiteralKind::Char(terminal.text.chars().collect::>()[1]), span: terminal.span.clone(), }), - LeekTokenKind::IntegerLiteral(_) => Atom::Literal(Literal { + TokenKind::IntegerLiteral(_) => Atom::Literal(Literal { kind: LiteralKind::Integer(IntegerKind::from(terminal.clone())), span: terminal.span.clone(), }), - LeekTokenKind::FloatLiteral => todo!(), - LeekTokenKind::OpenParen => { + TokenKind::FloatLiteral => todo!(), + TokenKind::OpenParen => { let expression = Expression::from_node(node.children[1].non_terminal()); assert_eq!( node.children[2].terminal_token().kind, - LeekTokenKind::CloseParen + TokenKind::CloseParen ); Atom::ParenthesizedExpression(Box::new(expression)) @@ -354,11 +348,11 @@ impl FromNode for Atom { } impl FromTerminal for UnaryOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::BitwiseNot => Self::BitwiseNot, - LeekTokenKind::LogicalNot => Self::LogicalNot, - LeekTokenKind::Asterisk => Self::Asterisk, + TokenKind::BitwiseNot => Self::BitwiseNot, + TokenKind::LogicalNot => Self::LogicalNot, + TokenKind::Asterisk => Self::Asterisk, _ => unreachable!("Invalid binary operator {:?}", node.kind), } } @@ -393,20 +387,17 @@ impl FromNode for FunctionCallExpression { let identifier = QualifiedIdentifier::from_node(node.children[0].non_terminal()); - assert_eq!( - node.children[1].terminal_token().kind, - LeekTokenKind::OpenParen - ); + assert_eq!(node.children[1].terminal_token().kind, TokenKind::OpenParen); let arguments = match &node.children[2] { ParseTreeNode::Terminal(terminal) => { - assert_eq!(terminal.kind, LeekTokenKind::CloseParen); + assert_eq!(terminal.kind, TokenKind::CloseParen); Vec::new() } ParseTreeNode::NonTerminal(non_terminal) => { assert_eq!( node.children[3].terminal_token().kind, - LeekTokenKind::CloseParen + TokenKind::CloseParen ); assert_nt_kind(non_terminal, ParseTreeNonTerminalKind::FunctionArguments); @@ -415,7 +406,7 @@ impl FromNode for FunctionCallExpression { for (index, argument) in non_terminal.children.iter().enumerate() { if index % 2 == 1 { - assert_eq!(argument.terminal_token().kind, LeekTokenKind::Comma); + assert_eq!(argument.terminal_token().kind, TokenKind::Comma); continue; } @@ -436,26 +427,26 @@ impl FromNode for FunctionCallExpression { } impl FromTerminal for BinaryOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::DoubleEquals => Self::DoubleEquals, - LeekTokenKind::LessThan => Self::LessThan, - LeekTokenKind::LessThanOrEqual => Self::LessThanOrEqual, - LeekTokenKind::GreaterThan => Self::GreaterThan, - LeekTokenKind::GreaterThanOrEqual => Self::GreaterThanOrEqual, - LeekTokenKind::Plus => Self::Plus, - LeekTokenKind::Minus => Self::Minus, - LeekTokenKind::Asterisk => Self::Asterisk, - LeekTokenKind::Divide => Self::Divide, - LeekTokenKind::Modulo => Self::Modulo, - LeekTokenKind::BitwiseXor => Self::BitwiseXor, - LeekTokenKind::BitwiseOr => Self::BitwiseOr, - LeekTokenKind::BitwiseAnd => Self::BitwiseAnd, - LeekTokenKind::Exponentiation => Self::Exponentiation, - LeekTokenKind::LeftShift => Self::LeftShift, - LeekTokenKind::RightShift => Self::RightShift, - LeekTokenKind::LogicalOr => Self::LogicalOr, - LeekTokenKind::LogicalAnd => Self::LogicalAnd, + TokenKind::DoubleEquals => Self::DoubleEquals, + TokenKind::LessThan => Self::LessThan, + TokenKind::LessThanOrEqual => Self::LessThanOrEqual, + TokenKind::GreaterThan => Self::GreaterThan, + TokenKind::GreaterThanOrEqual => Self::GreaterThanOrEqual, + TokenKind::Plus => Self::Plus, + TokenKind::Minus => Self::Minus, + TokenKind::Asterisk => Self::Asterisk, + TokenKind::Divide => Self::Divide, + TokenKind::Modulo => Self::Modulo, + TokenKind::BitwiseXor => Self::BitwiseXor, + TokenKind::BitwiseOr => Self::BitwiseOr, + TokenKind::BitwiseAnd => Self::BitwiseAnd, + TokenKind::Exponentiation => Self::Exponentiation, + TokenKind::LeftShift => Self::LeftShift, + TokenKind::RightShift => Self::RightShift, + TokenKind::LogicalOr => Self::LogicalOr, + TokenKind::LogicalAnd => Self::LogicalAnd, _ => unreachable!("Invalid binary operator {:?}", node.kind), } } @@ -562,13 +553,13 @@ impl FromNode for FunctionDefinition { // Make sure nodes are correct assert_eq!( parameter_nodes.first().unwrap().terminal_token().kind, - LeekTokenKind::OpenParen, + TokenKind::OpenParen, "Expected first token of params to be open paren" ); assert_eq!( parameter_nodes.last().unwrap().terminal_token().kind, - LeekTokenKind::CloseParen, + TokenKind::CloseParen, "Expected last token of params to be close paren" ); @@ -578,7 +569,7 @@ impl FromNode for FunctionDefinition { if i % 2 == 0 { assert_eq!( parameter_nodes.get(i).unwrap().terminal_token().kind, - LeekTokenKind::Comma, + TokenKind::Comma, "Expected token to be comma" ); continue; @@ -600,7 +591,7 @@ impl FromNode for FunctionDefinition { assert_eq!( function_return_type.children[0].terminal_token().kind, - LeekTokenKind::Arrow, + TokenKind::Arrow, "Expected first token of return type to be arrow" ); @@ -632,7 +623,7 @@ impl FromNode for FunctionParameter { let identifier = node.children[0].terminal_token().text.clone(); - assert!(node.children[1].terminal_token().kind == LeekTokenKind::Colon); + assert!(node.children[1].terminal_token().kind == TokenKind::Colon); let ty = Type::from_node(node.children[2].non_terminal()); @@ -648,12 +639,12 @@ impl FromNode for Block { assert_eq!( node.children.first().unwrap().terminal_token().kind, - LeekTokenKind::OpenCurlyBracket + TokenKind::OpenCurlyBracket ); assert_eq!( node.children.last().unwrap().terminal_token().kind, - LeekTokenKind::CloseCurlyBracket + TokenKind::CloseCurlyBracket ); let mut statements = Vec::new(); @@ -668,24 +659,24 @@ impl FromNode for Block { } impl FromTerminal for AssignmentOperator { - fn from_terminal(node: &LeekToken) -> Self { + fn from_terminal(node: &Token) -> Self { match node.kind { - LeekTokenKind::Equals => Self::Equals, - LeekTokenKind::PlusEquals => Self::PlusEquals, - LeekTokenKind::MinusEquals => Self::MinusEquals, - LeekTokenKind::MultiplyEquals => Self::MultiplyEquals, - LeekTokenKind::DivideEquals => Self::DivideEquals, - LeekTokenKind::ModuloEquals => Self::ModuloEquals, - LeekTokenKind::BitwiseNotEquals => Self::BitwiseNotEquals, - LeekTokenKind::BitwiseXorEquals => Self::BitwiseXorEquals, - LeekTokenKind::BitwiseOrEquals => Self::BitwiseOrEquals, - LeekTokenKind::BitwiseAndEquals => Self::BitwiseAndEquals, - LeekTokenKind::LogicalNotEquals => Self::LogicalNotEquals, - LeekTokenKind::ExponentiationEquals => Self::ExponentiationEquals, - LeekTokenKind::LeftShiftEquals => Self::LeftShiftEquals, - LeekTokenKind::RightShiftEquals => Self::RightShiftEquals, - LeekTokenKind::LogicalOrEquals => Self::LogicalOrEquals, - LeekTokenKind::LogicalAndEquals => Self::LogicalAndEquals, + TokenKind::Equals => Self::Equals, + TokenKind::PlusEquals => Self::PlusEquals, + TokenKind::MinusEquals => Self::MinusEquals, + TokenKind::MultiplyEquals => Self::MultiplyEquals, + TokenKind::DivideEquals => Self::DivideEquals, + TokenKind::ModuloEquals => Self::ModuloEquals, + TokenKind::BitwiseNotEquals => Self::BitwiseNotEquals, + TokenKind::BitwiseXorEquals => Self::BitwiseXorEquals, + TokenKind::BitwiseOrEquals => Self::BitwiseOrEquals, + TokenKind::BitwiseAndEquals => Self::BitwiseAndEquals, + TokenKind::LogicalNotEquals => Self::LogicalNotEquals, + TokenKind::ExponentiationEquals => Self::ExponentiationEquals, + TokenKind::LeftShiftEquals => Self::LeftShiftEquals, + TokenKind::RightShiftEquals => Self::RightShiftEquals, + TokenKind::LogicalOrEquals => Self::LogicalOrEquals, + TokenKind::LogicalAndEquals => Self::LogicalAndEquals, _ => { panic!("Invalid assignment operator {:?}", node.kind); } @@ -794,35 +785,29 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::FunctionCall(FunctionCallExpression { - identifier: QualifiedIdentifier::new(None, "println".to_owned()), - arguments: vec![Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::String("\"Hello, world!\"".to_owned()), - span: Span::new( - Position { row: 1, col: 12 }, - Position { row: 1, col: 27 }, - ), - }))], - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::FunctionCall(FunctionCallExpression { + identifier: QualifiedIdentifier::new(None, "println".to_owned()), + arguments: vec![Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::String("\"Hello, world!\"".to_owned()), + span: Span::new( + Position { row: 1, col: 12 }, + Position { row: 1, col: 27 }, + ), + }))], + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -840,37 +825,31 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::Block(Block { - statements: vec![Statement::FunctionCall(FunctionCallExpression { - identifier: QualifiedIdentifier::new(None, "println".to_owned()), - arguments: vec![Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::String("\"Hello, world!\"".to_owned()), - span: Span::new( - Position { row: 2, col: 16 }, - Position { row: 2, col: 31 }, - ), - }))], - })], + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::Block(Block { + statements: vec![Statement::FunctionCall(FunctionCallExpression { + identifier: QualifiedIdentifier::new(None, "println".to_owned()), + arguments: vec![Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::String("\"Hello, world!\"".to_owned()), + span: Span::new( + Position { row: 2, col: 16 }, + Position { row: 2, col: 31 }, + ), + }))], })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -886,37 +865,31 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(100)), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 16 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(100)), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 16 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -932,36 +905,30 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableAssignment(VariableAssignment { - identifier: QualifiedIdentifier::new(None, "a".to_owned()), - operator: AssignmentOperator::PlusEquals, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(420)), - span: Span::new( - Position { row: 1, col: 9 }, - Position { row: 1, col: 12 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableAssignment(VariableAssignment { + identifier: QualifiedIdentifier::new(None, "a".to_owned()), + operator: AssignmentOperator::PlusEquals, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(420)), + span: Span::new( + Position { row: 1, col: 9 }, + Position { row: 1, col: 12 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -977,45 +944,39 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "add".to_owned(), - struct_identifier: None, - parameters: vec![ - FunctionParameter { - identifier: "a".to_owned(), - ty: Type::Primitive(PrimitiveKind::I32), - }, - FunctionParameter { - identifier: "b".to_owned(), - ty: Type::Primitive(PrimitiveKind::I32), - }, - ], - return_type: Type::Primitive(PrimitiveKind::I32), - body: Block { - statements: vec![Statement::Yeet(Expression::BinaryExpression( - BinaryExpression { - binary_operator: BinaryOperator::Plus, - lhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( - QualifiedIdentifier::new(None, "a".to_owned()), - ))), - rhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( - QualifiedIdentifier::new(None, "b".to_owned()), - ))), - }, - ))], + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "add".to_owned(), + struct_identifier: None, + parameters: vec![ + FunctionParameter { + identifier: "a".to_owned(), + ty: Type::Primitive(PrimitiveKind::I32), }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + FunctionParameter { + identifier: "b".to_owned(), + ty: Type::Primitive(PrimitiveKind::I32), + }, + ], + return_type: Type::Primitive(PrimitiveKind::I32), + body: Block { + statements: vec![Statement::Yeet(Expression::BinaryExpression( + BinaryExpression { + binary_operator: BinaryOperator::Plus, + lhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( + QualifiedIdentifier::new(None, "a".to_owned()), + ))), + rhs: Box::new(Expression::Atom(Atom::QualifiedIdentifier( + QualifiedIdentifier::new(None, "b".to_owned()), + ))), + }, + ))], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1031,37 +992,31 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Char('b'), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 16 }, - ), - })), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Char('b'), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 16 }, + ), + })), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1077,40 +1032,34 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::UnaryExpression(UnaryExpression { - unary_operator: UnaryOperator::BitwiseNot, - expression: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(69)), - span: Span::new( - Position { row: 1, col: 14 }, - Position { row: 1, col: 16 }, - ), - }))), - }), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::UnaryExpression(UnaryExpression { + unary_operator: UnaryOperator::BitwiseNot, + expression: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(69)), + span: Span::new( + Position { row: 1, col: 14 }, + Position { row: 1, col: 16 }, + ), + }))), + }), + })], + }, + })], }; assert_ast_eq!(ast, expected); @@ -1126,47 +1075,41 @@ mod tests { let ast = parse_string(INPUT.to_owned()).unwrap_or_else(|e| panic!("{e}")); - let expected = LeekAst { + let expected = Ast { source_file: SourceFile { path: None, content: INPUT.to_owned(), }, - root: Program { - constant_variables: vec![], - static_variables: vec![], - function_definitions: vec![FunctionDefinition { - name: "main".to_owned(), - struct_identifier: None, - parameters: vec![], - return_type: Type::Primitive(PrimitiveKind::Void), - body: Block { - statements: vec![Statement::VariableDeclaration(VariableDeclaration { - kind: VariableDeclarationKind::Local, - identifier: "a".to_owned(), - ty: None, - value: Expression::BinaryExpression(BinaryExpression { - binary_operator: BinaryOperator::Minus, - lhs: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(69)), - span: Span::new( - Position { row: 1, col: 13 }, - Position { row: 1, col: 15 }, - ), - }))), - rhs: Box::new(Expression::Atom(Atom::Literal(Literal { - kind: LiteralKind::Integer(IntegerKind::I32(420)), - span: Span::new( - Position { row: 1, col: 18 }, - Position { row: 1, col: 21 }, - ), - }))), - }), - })], - }, - }], - struct_definitions: vec![], - enum_definitions: vec![], - }, + items: vec![ProgramPart::FunctionDefinition(FunctionDefinition { + name: "main".to_owned(), + struct_identifier: None, + parameters: vec![], + return_type: Type::Primitive(PrimitiveKind::Void), + body: Block { + statements: vec![Statement::VariableDeclaration(VariableDeclaration { + kind: VariableDeclarationKind::Local, + identifier: "a".to_owned(), + ty: None, + value: Expression::BinaryExpression(BinaryExpression { + binary_operator: BinaryOperator::Minus, + lhs: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(69)), + span: Span::new( + Position { row: 1, col: 13 }, + Position { row: 1, col: 15 }, + ), + }))), + rhs: Box::new(Expression::Atom(Atom::Literal(Literal { + kind: LiteralKind::Integer(IntegerKind::I32(420)), + span: Span::new( + Position { row: 1, col: 18 }, + Position { row: 1, col: 21 }, + ), + }))), + }), + })], + }, + })], }; assert_ast_eq!(ast, expected); diff --git a/compiler/src/frontend/ast/mod.rs b/compiler/src/frontend/ast/mod.rs index cd66cb0..21d1bac 100644 --- a/compiler/src/frontend/ast/mod.rs +++ b/compiler/src/frontend/ast/mod.rs @@ -5,24 +5,24 @@ use super::position::{SourceFile, Span}; pub mod builder; #[derive(Debug)] -pub struct LeekAst { +pub struct Ast { pub source_file: SourceFile, - pub root: Program, + pub items: Vec, } -impl PartialEq for LeekAst { +impl PartialEq for Ast { fn eq(&self, other: &Self) -> bool { - self.root == other.root + self.items == other.items } } #[derive(Debug, PartialEq)] -pub struct Program { - pub constant_variables: Vec, - pub static_variables: Vec, - pub function_definitions: Vec, - pub struct_definitions: Vec, - pub enum_definitions: Vec, +pub enum ProgramPart { + ConstantVariable(VariableDeclaration), + StaticVariable(VariableDeclaration), + FunctionDefinition(FunctionDefinition), + StructDefinition(StructDefinition), + EnumDefinition(EnumDefinition), } #[derive(Debug, PartialEq)] @@ -213,7 +213,7 @@ pub struct StructFieldAccess { #[derive(Debug, PartialEq)] pub struct StructMethodCall { - pub identifier: QualifiedIdentifier, + pub callee: Box, pub method: String, pub arguments: Vec, } diff --git a/compiler/src/frontend/lexer.rs b/compiler/src/frontend/lexer.rs deleted file mode 100644 index 5756972..0000000 --- a/compiler/src/frontend/lexer.rs +++ /dev/null @@ -1,1661 +0,0 @@ -use std::{ - cell::UnsafeCell, - collections::VecDeque, - fmt::{Debug, Display}, -}; - -use crate::{ - frontend::position::{Position, SourceFile, Span}, - frontend::reader::CharacterReader, -}; - -#[allow(dead_code)] -#[cfg_attr(not(test), derive(Debug))] -#[derive(Clone)] -pub struct LeekToken { - pub kind: LeekTokenKind, - pub text: String, - pub span: Span, -} - -impl From<(LeekTokenKind, T)> for LeekToken -where - T: Into + Sized, -{ - fn from((kind, text): (LeekTokenKind, T)) -> Self { - Self { - kind, - text: text.into(), - span: Span::from(Position::new()), - } - } -} - -#[cfg(test)] -impl Debug for LeekToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("LeekToken") - .field("kind", &self.kind) - .field("text", &self.text) - .finish() - } -} - -impl Display for LeekToken { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?} => {:?}", self.kind, self.text) - } -} - -impl PartialEq for LeekToken { - fn eq(&self, other: &Self) -> bool { - self.kind == other.kind && self.text == other.text - } -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum IntegerLiteralKind { - Decimal, - Hexadecimal, - Binary, - Octal, -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum KeywordKind { - Fn, - Struct, - Leak, - Hold, - Perm, - If, - Else, - While, - For, - Yeet, -} - -impl TryFrom<&String> for KeywordKind { - type Error = (); - - fn try_from(value: &String) -> Result { - Ok(match value.as_str() { - "fn" => Self::Fn, - "struct" => Self::Struct, - "leak" => Self::Leak, - "hold" => Self::Hold, - "perm" => Self::Perm, - "if" => Self::If, - "else" => Self::Else, - "while" => Self::While, - "for" => Self::For, - "yeet" => Self::Yeet, - _ => return Err(()), - }) - } -} - -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub enum LeekTokenKind { - // Significant Whitespace - Newline, - - // Words - Keyword(KeywordKind), // leak - Identifier, // YourMom - - // Grouping - OpenParen, // ( - CloseParen, // ) - OpenBracket, // [ - CloseBracket, // ] - OpenCurlyBracket, // { - CloseCurlyBracket, // } - - // Literals - StringLiteral, // "your mom" - CharLiteral, // 'd' - IntegerLiteral(IntegerLiteralKind), // 69 - FloatLiteral, // 420.69 - - // Single Operators - Equals, // = - DoubleEquals, // == - LessThan, // < - LessThanOrEqual, // <= - GreaterThan, // > - GreaterThanOrEqual, // >= - Plus, // + - PlusEquals, // += - Minus, // - - MinusEquals, // -= - Asterisk, // * - MultiplyEquals, // *= - Divide, // / - DivideEquals, // /= - Modulo, // % - ModuloEquals, // %= - BitwiseNot, // ~ - BitwiseNotEquals, // ~= - BitwiseXor, // ^ - BitwiseXorEquals, // ^= - BitwiseOr, // | - BitwiseOrEquals, // |= - BitwiseAnd, // & - BitwiseAndEquals, // &= - LogicalNot, // ! - LogicalNotEquals, // != - - // Double Operators - Exponentiation, // ** - ExponentiationEquals, // **= - LeftShift, // << - LeftShiftEquals, // <<= - RightShift, // >> - RightShiftEquals, // >>= - LogicalOr, // || - LogicalOrEquals, // ||= - LogicalAnd, // && - LogicalAndEquals, // &&= - - // Non-Operator symbols - Arrow, // -> - QuestionMark, // ? - Comma, // , - Semicolon, // ; - Colon, // : - DoubleColon, // :: - Period, // . - BangCoalescing, // !. - BackSlash, // \ - Underscore, // _ - Asperand, // @ - Hash, // # - DollarSign, // $ -} - -impl LeekTokenKind { - pub fn is_assignment_operator(&self) -> bool { - matches!( - self, - Self::Equals - | Self::PlusEquals - | Self::MinusEquals - | Self::MultiplyEquals - | Self::DivideEquals - | Self::ModuloEquals - | Self::BitwiseNotEquals - | Self::BitwiseXorEquals - | Self::BitwiseOrEquals - | Self::BitwiseAndEquals - | Self::LogicalNotEquals - | Self::ExponentiationEquals - | Self::LeftShiftEquals - | Self::RightShiftEquals - | Self::LogicalOrEquals - | Self::LogicalAndEquals - ) - } - - pub fn is_unary_operator(&self) -> bool { - matches!(self, Self::BitwiseNot | Self::LogicalNot | Self::Asterisk) - } - - pub fn is_binary_operator(&self) -> bool { - matches!( - self, - Self::DoubleEquals - | Self::LessThan - | Self::LessThanOrEqual - | Self::GreaterThan - | Self::GreaterThanOrEqual - | Self::Plus - | Self::Minus - | Self::Asterisk - | Self::Divide - | Self::Modulo - | Self::BitwiseXor - | Self::BitwiseOr - | Self::BitwiseAnd - | Self::Exponentiation - | Self::LeftShift - | Self::RightShift - | Self::LogicalOr - | Self::LogicalAnd - ) - } - - pub fn is_literal(&self) -> bool { - matches!( - self, - Self::CharLiteral | Self::StringLiteral | Self::FloatLiteral | Self::IntegerLiteral(_) - ) - } - - fn grouping_symbol_from(c: char) -> LeekTokenKind { - match c { - '(' => Self::OpenParen, - ')' => Self::CloseParen, - '[' => Self::OpenBracket, - ']' => Self::CloseBracket, - '{' => Self::OpenCurlyBracket, - '}' => Self::CloseCurlyBracket, - x => unreachable!("Illegal non-grouping symbol `{}`", x), - } - } - - fn single_operator_from(c: char) -> LeekTokenKind { - match c { - '=' => Self::Equals, - '<' => Self::LessThan, - '>' => Self::GreaterThan, - '!' => Self::LogicalNot, - '+' => Self::Plus, - '-' => Self::Minus, - '*' => Self::Asterisk, - '/' => Self::Divide, - '%' => Self::Modulo, - '~' => Self::BitwiseNot, - '^' => Self::BitwiseXor, - '|' => Self::BitwiseOr, - '&' => Self::BitwiseAnd, - x => unreachable!("Illegal single non-operator `{}`", x), - } - } - - fn double_operator_from(c: char) -> LeekTokenKind { - match c { - '*' => Self::Exponentiation, - '<' => Self::LeftShift, - '>' => Self::RightShift, - '&' => Self::LogicalAnd, - '|' => Self::LogicalOr, - x => unreachable!("Illegal double non-operator `{}`", x), - } - } - - fn single_equals_operator_from(c: char) -> LeekTokenKind { - match c { - '=' => Self::DoubleEquals, - '<' => Self::LessThanOrEqual, - '>' => Self::GreaterThanOrEqual, - '!' => Self::LogicalNotEquals, - '+' => Self::PlusEquals, - '-' => Self::MinusEquals, - '*' => Self::MultiplyEquals, - '/' => Self::DivideEquals, - '%' => Self::ModuloEquals, - '~' => Self::BitwiseNotEquals, - '^' => Self::BitwiseXorEquals, - '|' => Self::BitwiseOrEquals, - '&' => Self::BitwiseAndEquals, - x => unreachable!("Illegal single non-equals-operator `{}`", x), - } - } - - fn double_equals_operator_from(c: char) -> LeekTokenKind { - match c { - '*' => Self::ExponentiationEquals, - '<' => Self::LeftShiftEquals, - '>' => Self::RightShiftEquals, - '&' => Self::LogicalAndEquals, - '|' => Self::LogicalOrEquals, - x => unreachable!("Illegal double non-equals-operator `{}`", x), - } - } - - fn other_symbol_from(c: impl Into) -> LeekTokenKind { - match c.into().as_str() { - "->" => Self::Arrow, - "?" => Self::QuestionMark, - "!." => Self::BangCoalescing, - "," => Self::Comma, - ";" => Self::Semicolon, - ":" => Self::Colon, - "::" => Self::DoubleColon, - "." => Self::Period, - "\\" => Self::BackSlash, - "_" => Self::Underscore, - "@" => Self::Asperand, - "#" => Self::Hash, - "$" => Self::DollarSign, - x => unreachable!("Illegal non-other-symbol `{}`", x), - } - } -} - -/// Represents an error when lexing a file -#[derive(Debug)] -pub struct LexerError { - kind: LexerErrorKind, - source_file: SourceFile, - position: Position, -} - -impl PartialEq for LexerError { - fn eq(&self, other: &Self) -> bool { - self.kind == other.kind - } -} - -#[cfg(test)] -impl From for LexerError { - fn from(kind: LexerErrorKind) -> Self { - use std::path::PathBuf; - - LexerError { - kind, - source_file: SourceFile { - path: Some(PathBuf::new()), - content: String::new(), - }, - position: Position::new(), - } - } -} - -#[derive(Debug, PartialEq)] -pub enum LexerErrorKind { - UnexpectedChar(char), - UnclosedWrappedLiteral(LeekTokenKind), - UnexpectedEndOfFloatLiteral, - UnexpectedCharactersInFloatLiteral, - UnexpectedExtraPeriodInFloatLiteral, - UnexpectedEndOfIntegerLiteral(IntegerLiteralKind), - UnexpectedCharactersInIntegerLiteral(IntegerLiteralKind), -} - -impl Display for LexerError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!( - f, - "{}:{}", - match &self.source_file.path { - Some(file) => file - .canonicalize() - .expect("Could not canonicalize file path") - .to_str() - .expect("Could not convert file path to string") - .trim_start_matches(r"\\?\") - .to_owned(), - None => "".to_owned(), - }, - self.position - )?; - - let lines: Vec<_> = self.source_file.content.lines().collect(); - - // Print the lines around and including the one with the error - let start = if self.position.row < 2 { - 0 - } else { - self.position.row - 2 - } as usize; - - // Print each line and the line number - for (n, line) in lines[start..(self.position.row + 1) as usize] - .iter() - .enumerate() - { - writeln!(f, "{:>3}: {}", n + start + 1, line)?; - } - - // Print the space before the highlight - for _ in 0..self.position.col + 5 { - write!(f, " ")?; - } - - // Print the underline highlight - writeln!(f, "^")?; - - // Print the space before "here" - for _ in 0..self.position.col + 5 { - write!(f, " ")?; - } - - writeln!(f, "here")?; - writeln!(f)?; - - match &self.kind { - LexerErrorKind::UnexpectedChar(c) => writeln!(f, "Unexpected char `{c}`"), - LexerErrorKind::UnclosedWrappedLiteral(kind) => { - writeln!(f, "Unexpected end of wrapped literal: {kind:?}") - } - LexerErrorKind::UnexpectedEndOfFloatLiteral => { - writeln!(f, "Unexpected end of float literal") - } - LexerErrorKind::UnexpectedCharactersInFloatLiteral => { - writeln!(f, "Unexpected characters inside float literal") - } - LexerErrorKind::UnexpectedExtraPeriodInFloatLiteral => { - writeln!(f, "Unexpected extra `.` inside float literal") - } - LexerErrorKind::UnexpectedEndOfIntegerLiteral(kind) => { - writeln!(f, "Unexpected end of {kind:?} integer literal") - } - LexerErrorKind::UnexpectedCharactersInIntegerLiteral(kind) => { - writeln!(f, "Unexpected characters inside {kind:?} integer literal") - } - } - } -} - -/// Represents a generic Lexer object -pub trait Lexer { - fn next(&mut self) -> Result, LexerError>; - fn has_next(&self) -> Result; - fn peek(&self) -> Result, LexerError>; - fn peek_nth(&self, n: usize) -> Result, LexerError>; - fn get_position(&self) -> &Position; - fn get_source_file(&self) -> &SourceFile; -} - -/// Defines a specific Lexer for Leek -/// -/// This lexer implementation uses a "lazy" iterator approach such -/// that characters are not read from the input stream until a token is requested. -/// -/// The lexer uses interior mutability to allow for peeking ahead in the token stream. -/// Since peeking ahead modifies the state of the character reader, this would otherwise -/// not be possible, unless the peek function took a mutable reference to the lexer. -/// -/// UnsafeCell is used to allow for an optimization of the peek function that stores -/// the peeked tokens in a VecDeque. This is done to avoid having to re-lex the same -/// tokens multiple times. -pub struct LeekLexer { - character_reader: UnsafeCell>, - peek_forward: UnsafeCell>, -} - -impl LeekLexer { - pub fn new(character_reader: impl CharacterReader + 'static) -> Self { - LeekLexer { - character_reader: UnsafeCell::new(Box::new(character_reader)), - peek_forward: UnsafeCell::new(VecDeque::new()), - } - } - - /// Read a literal that is wrapped in the provided character - /// The wrapper character can be escaped using the backslash character `\` - fn read_wrapped_escapable( - &self, - wrapper: char, - kind: LeekTokenKind, - ) -> Result { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - let mut text = String::new(); - let start = character_reader.get_position().clone(); - - macro_rules! get_next_char { - () => { - if let Some(c) = character_reader.next() { - c - } else { - return Err(LexerError { - kind: LexerErrorKind::UnclosedWrappedLiteral(kind), - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - }); - } - }; - } - - macro_rules! peek_nth_char { - ($n:expr) => { - if let Some(c) = character_reader.peek_nth($n) { - c - } else { - return Err(LexerError { - kind: LexerErrorKind::UnclosedWrappedLiteral(kind), - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - }); - } - }; - } - - macro_rules! expect_wrapper { - () => { - let c = get_next_char!(); - - if c != wrapper { - unreachable!( - "Not enough chars in character_reader (should be checked in advance)" - ); - } - - text.push(c); - }; - } - - // First Quote - expect_wrapper!(); - - // Read until next quote - while *peek_nth_char!(0) != wrapper { - // If escape char was found, read it in, and read in the escaped char - if *peek_nth_char!(0) == '\\' && *peek_nth_char!(1) == wrapper { - text.push(get_next_char!()); - } - - text.push(get_next_char!()); - } - - // Second Quote - expect_wrapper!(); - - let end = character_reader.get_position().clone(); - - Ok(LeekToken { - kind, - text, - span: Span::new(start, end), - }) - } - - /// Reads a generic number literal into either an integer or double - fn read_number_literal(&self) -> Result { - /* - * Integer Cases: - * - * 1. `42069` - 1 or more dec digits - * 2. `0xF2AB` - `0x` and then 1 or more hex digits - * 3. `0b11010101` - `0b` and then 1 or more binary digits - * 4. `0o01234567` - `0o` and then 1 or more octal digits - * - * 5. `1337u32`, `69i32` - 1 or more dec digits followed by `u` or `i` (8, 16, 32, 64, 128, or size) - * - * If size is not specified, i32 is the default - * - * Anywhere within the digits, `_` is allowed and is ignored - * - * Float Cases: - * - * 1. `0.1375454` - 1 or more dec digits, a `.`, and 1 or more dec digits - * 2. `576.1375454f64` - 1 or more dec digits, a `.`, 1 or more dec digits followed by `f` (32 or 64) - * - * If size is not specified, f32 is the default - */ - - // TODO: Lex negative numbers - - let character_reader = unsafe { &mut *self.character_reader.get() }; - - // Look ahead to match different literal types - return 'number: { - if *character_reader.peek().unwrap() == '0' { - let Some(c) = character_reader.peek_nth(1) else { - // Only found `0` and nothing else so parse as int literal `0` - break 'number self.read_dec_int_or_float_literal(); - }; - - match *c { - // Hex int with format `0x` - 'x' => self.read_based_int_literal(IntegerLiteralKind::Hexadecimal, |c| { - c.is_ascii_hexdigit() - }), - // Bin int with format `0b` - 'b' => self.read_based_int_literal(IntegerLiteralKind::Binary, |c| { - c == '0' || c == '1' - }), - // Oct int with format `0o` - 'o' => self.read_based_int_literal(IntegerLiteralKind::Octal, |c| { - c.is_ascii_octdigit() - }), - // Float with format `0.` - '.' => self.read_dec_int_or_float_literal(), - // Int with format `0` or potentially float with format `0.` - a if a.is_ascii_digit() => self.read_dec_int_or_float_literal(), - // found non digit and non special specifier after leading `0`, so parse the `0` as a single dec digit - _ => self.read_dec_int_or_float_literal(), - } - } else { - // Any dec int or float that does not start with 0 - self.read_dec_int_or_float_literal() - } - }; - } - - fn read_based_int_literal( - &self, - literal_kind: IntegerLiteralKind, - is_in_base: fn(char) -> bool, - ) -> Result { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - macro_rules! create_error { - ($kind:expr) => { - LexerError { - kind: $kind, - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - } - }; - } - - macro_rules! get_next_char { - () => { - character_reader.next().ok_or_else(|| LexerError { - kind: LexerErrorKind::UnexpectedEndOfIntegerLiteral(literal_kind), - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - })? - }; - } - - let start = character_reader.get_position().clone(); - - let mut text = String::new(); - - // `0` - text.push(get_next_char!()); - // special boundary - text.push(get_next_char!()); - - while character_reader.has_next() { - let peeked_char = *character_reader.peek().unwrap(); - - if peeked_char == '_' { - // Ignore underscores - character_reader.next().unwrap(); - continue; - } else if !peeked_char.is_ascii_alphanumeric() { - // Stop parsing where we are if we encounter any symbols - break; - } - - // TODO: add support for type specifiers like `u32` and `i32` - - if !is_in_base(peeked_char) { - return Err(create_error!( - LexerErrorKind::UnexpectedCharactersInIntegerLiteral(literal_kind) - )); - } - - text.push(get_next_char!()); - } - - if text.len() <= 2 { - return Err(create_error!( - LexerErrorKind::UnexpectedEndOfIntegerLiteral(literal_kind) - )); - } - - let end = character_reader.get_position().clone(); - - Ok(LeekToken { - kind: LeekTokenKind::IntegerLiteral(literal_kind), - text, - span: Span::new(start, end), - }) - } - - fn read_dec_int_or_float_literal(&self) -> Result { - enum NumberLexingState { - Integer, - Float, - } - - let mut state = NumberLexingState::Integer; - - let character_reader = unsafe { &mut *self.character_reader.get() }; - - macro_rules! create_error { - ($kind:expr) => { - LexerError { - kind: $kind, - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - } - }; - } - - macro_rules! get_next_char { - () => { - character_reader.next().ok_or_else(|| LexerError { - kind: match state { - NumberLexingState::Integer => { - LexerErrorKind::UnexpectedEndOfIntegerLiteral( - IntegerLiteralKind::Decimal, - ) - } - NumberLexingState::Float => LexerErrorKind::UnexpectedEndOfFloatLiteral, - }, - source_file: character_reader.get_source_file().to_owned(), - position: character_reader.get_position().clone(), - })? - }; - } - - let start = character_reader.get_position().clone(); - - let mut text = String::new(); - - // first char - text.push(get_next_char!()); - - while character_reader.has_next() { - let peeked_char = *character_reader.peek().unwrap(); - - match peeked_char { - // Ignore underscores - '_' => { - character_reader.next().unwrap(); - continue; - } - // Stop lexing where we are if we encounter any symbols - c if !c.is_ascii_alphanumeric() && c != '.' => { - break; - } - // Non digit and non `.` characters while lexing - c if !c.is_ascii_digit() && c != '.' => { - return Err(create_error!(match state { - NumberLexingState::Integer => { - LexerErrorKind::UnexpectedCharactersInIntegerLiteral( - IntegerLiteralKind::Decimal, - ) - } - NumberLexingState::Float => - LexerErrorKind::UnexpectedCharactersInFloatLiteral, - })); - } - // Beginning of float, or an error - '.' => { - match state { - NumberLexingState::Integer => state = NumberLexingState::Float, - NumberLexingState::Float => { - return Err(create_error!( - LexerErrorKind::UnexpectedExtraPeriodInFloatLiteral - )) - } - } - text.push(get_next_char!()); - } - // Any ascii digit 0-9 - c if c.is_ascii_digit() => { - text.push(get_next_char!()); - } - // All other character types have already been matched - _ => unreachable!(), - } - - // TODO: add support for type specifiers like `u32` and `i32` - } - - // If we are in float mode, check if there were chars after the `.` - if let NumberLexingState::Float = state { - if text.ends_with('.') { - return Err(create_error!(LexerErrorKind::UnexpectedEndOfFloatLiteral)); - } - } - - let end = character_reader.get_position().clone(); - - Ok(LeekToken { - kind: match state { - NumberLexingState::Integer => { - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal) - } - NumberLexingState::Float => LeekTokenKind::FloatLiteral, - }, - text, - span: Span::new(start, end), - }) - } - - /// Advance the lexer while the next character matches the predicate, and return the resulting string - fn read_while(&self, predicate: fn(char) -> bool) -> String { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - let mut res = String::new(); - - while character_reader.has_next() { - let c = character_reader.peek().unwrap(); - - if !predicate(*c) { - return res; - } - - res.push(character_reader.next().unwrap()); - } - - res - } - - /// Advance the lexer while the next character matches the predicate, and discard the matched chars - fn ignore_while(&self, predicate: fn(char) -> bool) { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - while character_reader.has_next() { - let c = character_reader.peek().unwrap(); - - if !predicate(*c) { - return; - } - - character_reader.next(); - } - } - - /// Requires character to be available - fn read_single(&self, kind: LeekTokenKind) -> LeekToken { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - let start = character_reader.get_position().clone(); - let c = character_reader.next().unwrap(); - let end = character_reader.get_position().clone(); - - LeekToken { - kind, - text: c.into(), - span: Span::new(start, end), - } - } - - /// Peeks the character reader several chars forward to look for a char sequence - fn lookahead_has(&self, string: &str, n: usize) -> bool { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - let chars = string.chars(); - - for (i, c) in chars.enumerate() { - let Some(peeked) = character_reader.peek_nth(n + i) else { - return false; - }; - - if *peeked != c { - return false; - } - } - - true - } - - /// Reads a fixed number of chars from the character reader and returns the resulting token - /// - /// Requires that the character reader be checked in advance to contain the correct sequence - fn read_multi(&self, string: &str, kind: LeekTokenKind) -> LeekToken { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - let mut text = String::new(); - let start = character_reader.get_position().clone(); - - for expected_char in string.chars() { - if !character_reader.has_next() { - unreachable!("Not enough chars in character_reader (should be checked in advance)") - } - - let c = character_reader.peek().unwrap(); - - if *c != expected_char { - unreachable!( - "Char from character_reader did not match (should be checked in advance)" - ) - } - - text.push(character_reader.next().unwrap()); - } - - let end = character_reader.get_position().clone(); - - LeekToken { - kind, - text, - span: Span::new(start, end), - } - } - - /// Looks ahead to see if there is an `=` following the given prefix - fn lookahead_has_equals(&self, prefix: impl Into, n: usize) -> bool { - let mut c: String = prefix.into(); - c.push('='); - - self.lookahead_has(&c, n) - } - - /// Reads a fixed number of chars with an `=` suffixed to the given prefix from the character reader and returns the resulting token - /// - /// Requires that the character reader be checked in advance to contain the correct sequence - fn read_multi_equals(&self, prefix: impl Into, kind: LeekTokenKind) -> LeekToken { - let mut c: String = prefix.into(); - c.push('='); - - self.read_multi(&c, kind) - } - - fn read_single_operator( - &self, - c: char, - single: LeekTokenKind, - equals: LeekTokenKind, - ) -> LeekToken { - if self.lookahead_has_equals(c, 0) { - self.read_multi_equals(c, equals) - } else { - self.read_single(single) - } - } - - fn read_double_operator( - &self, - c: char, - normal: LeekTokenKind, - equals: LeekTokenKind, - ) -> LeekToken { - if self.lookahead_has_equals(c, 1) { - self.read_multi_equals(c.to_string().repeat(2), equals) - } else { - self.read_multi(&c.to_string().repeat(2), normal) - } - } - - fn read_next_token(&self) -> Result, LexerError> { - let character_reader = unsafe { &mut *self.character_reader.get() }; - - while character_reader.has_next() { - let start = character_reader.get_position().clone(); - - // SAFETY: always checking if more characters are available before unwrapping - let first_char = *character_reader.peek().unwrap(); - - let token = Ok(Some(match first_char { - // New lines are significant - '\n' => self.read_single(LeekTokenKind::Newline), - - // Whitespace - a if a.is_ascii_whitespace() => { - self.ignore_while(|c| c.is_ascii_whitespace() && c != '\n'); - continue; - } - - // Chop Comments - '/' if character_reader.peek_nth(1).is_some_and(|c| *c == '/') => { - self.ignore_while(|c| c != '\n'); - continue; - } - - // Words - a if a.is_ascii_alphabetic() => { - let word = self.read_while(|c| c.is_ascii_alphanumeric() || c == '_'); - - LeekToken { - kind: match KeywordKind::try_from(&word) { - Ok(kw_kind) => LeekTokenKind::Keyword(kw_kind), - Err(_) => LeekTokenKind::Identifier, - }, - text: word, - span: Span::new(start, character_reader.get_position().clone()), - } - } - - // Literals - '"' => self.read_wrapped_escapable('"', LeekTokenKind::StringLiteral)?, - '\'' => self.read_wrapped_escapable('\'', LeekTokenKind::CharLiteral)?, - a if a.is_ascii_digit() => self.read_number_literal()?, - - // Grouping Symbols - c @ ('(' | ')' | '[' | ']' | '{' | '}') => { - self.read_single(LeekTokenKind::grouping_symbol_from(c)) - } - - // Arrows (`->`) - '-' if character_reader.peek_nth(1).is_some_and(|c| *c == '>') => { - self.read_multi("->", LeekTokenKind::Arrow) - } - - // Bang Coalescing (`!.`) - '!' if character_reader.peek_nth(1).is_some_and(|c| *c == '.') => { - self.read_multi("!.", LeekTokenKind::BangCoalescing) - } - - // Double Colon (`::`) - ':' if character_reader.peek_nth(1).is_some_and(|c| *c == ':') => { - self.read_multi("::", LeekTokenKind::DoubleColon) - } - - // Double operators (must come first because of lookahead clash) - c @ ('*' | '&' | '|' | '>' | '<') - if character_reader.peek_nth(1).is_some_and(|x| *x == c) => - { - self.read_double_operator( - c, - LeekTokenKind::double_operator_from(c), - LeekTokenKind::double_equals_operator_from(c), - ) - } - - // Single Operators - c @ ('=' | '<' | '>' | '+' | '-' | '*' | '/' | '%' | '~' | '!' | '&' | '|' - | '^') => self.read_single_operator( - c, - LeekTokenKind::single_operator_from(c), - LeekTokenKind::single_equals_operator_from(c), - ), - - // Non-Operator symbols - c @ ('?' | ',' | ';' | ':' | '.' | '\\' | '_' | '@' | '#' | '$') => { - self.read_single(LeekTokenKind::other_symbol_from(c)) - } - - // Other - c => { - return Err(LexerError { - kind: LexerErrorKind::UnexpectedChar(c), - source_file: character_reader.get_source_file().clone(), - position: character_reader.get_position().clone(), - }) - } - })); - - return token; - } - - // If got to the end of the cursor without finding any more tokens, - // then we will never return more tokens - Ok(None) - } - - fn _next(&self) -> Result, LexerError> { - let peek_forward = unsafe { &mut *self.peek_forward.get() }; - - // Check if more tokens have already been precomputed for us - if !peek_forward.is_empty() { - // Always returns `Some` - return Ok(peek_forward.pop_front()); - } - - self.read_next_token() - } -} - -impl Lexer for LeekLexer { - fn next(&mut self) -> Result, LexerError> { - self._next() - } - - fn peek(&self) -> Result, LexerError> { - let peek_forward = unsafe { &mut *self.peek_forward.get() }; - - // Check if more tokens have already been precomputed for us - if let Some(token) = peek_forward.front() { - // Always returns `Some` - return Ok(Some(token)); - } - - let peek_forward = unsafe { &mut *self.peek_forward.get() }; - - // If there are more tokens - if let Some(token) = self._next()? { - // Store the token for later usage - peek_forward.push_back(token); - - // Return a reference to the token - Ok(peek_forward.front()) - } else { - // Otherwise, return None since there are no tokens to peek - Ok(None) - } - } - - fn peek_nth(&self, n: usize) -> Result, LexerError> { - let peek_forward = unsafe { &mut *self.peek_forward.get() }; - - // Check if `n` tokens have already been precomputed for us - if peek_forward.len() > n { - // Always returns `Some` - let peek = peek_forward; - return Ok(peek.get(n)); - } - - // Otherwise, pre-compute the next `n` tokens from the amount we've already computed - for _ in peek_forward.len()..=n { - // Get the next token or return early if none more are found - let Some(token) = self.read_next_token()? else { - return Ok(None); - }; - - // Store the token for later usage - peek_forward.push_back(token); - } - - // Always returns `Some` because we would not have completed the loop otherwise. - Ok(peek_forward.get(n)) - } - - fn has_next(&self) -> Result { - Ok(self.peek()?.is_some()) - } - - fn get_position(&self) -> &Position { - let character_reader = unsafe { &*self.character_reader.get() }; - - character_reader.get_position() - } - - fn get_source_file(&self) -> &SourceFile { - let character_reader = unsafe { &*self.character_reader.get() }; - - character_reader.get_source_file() - } -} - -#[cfg(test)] -mod test { - use crate::{ - frontend::lexer::{ - IntegerLiteralKind::*, KeywordKind::*, LeekToken as LT, LeekTokenKind::*, - }, - frontend::reader::FileReader, - }; - - use super::{LeekLexer, Lexer, LexerError, LexerErrorKind::*}; - - fn compare_input_to_expected(input: &str, expected_tokens: Vec) { - // Collect tokens from lexer - let reader = FileReader::from(input.to_owned()); - let mut lexer = LeekLexer::new(reader); - - let mut lexer_tokens = Vec::new(); - - while lexer.has_next().unwrap() { - lexer_tokens.push(lexer.next().unwrap().unwrap()) - } - - assert_eq!( - lexer_tokens, expected_tokens, - "Lexer tokens did not match expected" - ) - } - - fn lex_input(input: &str) -> Result, LexerError> { - // Collect tokens from lexer - let reader = FileReader::from(input.to_owned()); - let mut lexer = LeekLexer::new(reader); - - let mut lexer_tokens = Vec::new(); - - while lexer.has_next()? { - lexer_tokens.push(lexer.next()?.unwrap()) - } - - Ok(lexer_tokens) - } - - #[test] - fn basic_example() { - compare_input_to_expected( - r#"fn main() { - leak node = Node() - - println() - }"#, - vec![ - LT::from((Keyword(Fn), "fn")), - LT::from((Identifier, "main")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((OpenCurlyBracket, "{")), - LT::from((Newline, "\n")), - LT::from((Keyword(Leak), "leak")), - LT::from((Identifier, "node")), - LT::from((Equals, "=")), - LT::from((Identifier, "Node")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((Identifier, "println")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((CloseCurlyBracket, "}")), - ], - ) - } - - #[test] - fn removes_comments() { - compare_input_to_expected( - r#"// this is a comment - fn main() { // this is a comment - leak node = Node() - // this is a comment - println() - // this is a comment - }// this is a comment"#, - vec![ - LT::from((Newline, "\n")), - LT::from((Keyword(Fn), "fn")), - LT::from((Identifier, "main")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((OpenCurlyBracket, "{")), - LT::from((Newline, "\n")), - LT::from((Keyword(Leak), "leak")), - LT::from((Identifier, "node")), - LT::from((Equals, "=")), - LT::from((Identifier, "Node")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((Identifier, "println")), - LT::from((OpenParen, "(")), - LT::from((CloseParen, ")")), - LT::from((Newline, "\n")), - LT::from((Newline, "\n")), - LT::from((CloseCurlyBracket, "}")), - ], - ) - } - - #[test] - fn basic_single_operators() { - compare_input_to_expected( - r#"= == < <= > >= + += - -= * *= / /= % %= ~ ~= ^ ^= | |= & &= ! !="#, - vec![ - LT::from((Equals, "=")), - LT::from((DoubleEquals, "==")), - LT::from((LessThan, "<")), - LT::from((LessThanOrEqual, "<=")), - LT::from((GreaterThan, ">")), - LT::from((GreaterThanOrEqual, ">=")), - LT::from((Plus, "+")), - LT::from((PlusEquals, "+=")), - LT::from((Minus, "-")), - LT::from((MinusEquals, "-=")), - LT::from((Asterisk, "*")), - LT::from((MultiplyEquals, "*=")), - LT::from((Divide, "/")), - LT::from((DivideEquals, "/=")), - LT::from((Modulo, "%")), - LT::from((ModuloEquals, "%=")), - LT::from((BitwiseNot, "~")), - LT::from((BitwiseNotEquals, "~=")), - LT::from((BitwiseXor, "^")), - LT::from((BitwiseXorEquals, "^=")), - LT::from((BitwiseOr, "|")), - LT::from((BitwiseOrEquals, "|=")), - LT::from((BitwiseAnd, "&")), - LT::from((BitwiseAndEquals, "&=")), - LT::from((LogicalNot, "!")), - LT::from((LogicalNotEquals, "!=")), - ], - ) - } - - #[test] - fn basic_double_operators() { - compare_input_to_expected( - r#"** **= << <<= >> >>= || ||= && &&="#, - vec![ - LT::from((Exponentiation, "**")), - LT::from((ExponentiationEquals, "**=")), - LT::from((LeftShift, "<<")), - LT::from((LeftShiftEquals, "<<=")), - LT::from((RightShift, ">>")), - LT::from((RightShiftEquals, ">>=")), - LT::from((LogicalOr, "||")), - LT::from((LogicalOrEquals, "||=")), - LT::from((LogicalAnd, "&&")), - LT::from((LogicalAndEquals, "&&=")), - ], - ) - } - - #[test] - fn double_non_operators() { - compare_input_to_expected( - r#"-> ->=-> - >"#, - vec![ - LT::from((Arrow, "->")), - LT::from((Arrow, "->")), - LT::from((Equals, "=")), - LT::from((Arrow, "->")), - LT::from((Minus, "-")), - LT::from((GreaterThan, ">")), - ], - ) - } - - #[test] - fn simple_string() { - compare_input_to_expected( - r#" "your mom 1""your mom 2" "your mom 3" "#, - vec![ - LT::from((StringLiteral, r#""your mom 1""#)), - LT::from((StringLiteral, r#""your mom 2""#)), - LT::from((StringLiteral, r#""your mom 3""#)), - ], - ) - } - - #[test] - fn string_quote_escapes() { - compare_input_to_expected( - r#" "your mom \"1\"" "your mom 2" "#, - vec![ - LT::from((StringLiteral, r#""your mom \"1\"""#)), - LT::from((StringLiteral, r#""your mom 2""#)), - ], - ) - } - - #[test] - fn unclosed_string() { - assert_eq!( - lex_input(r#" "this is a string that doesn't have a closing double quote"#), - Err(LexerError::from(UnclosedWrappedLiteral(StringLiteral))) - ) - } - - #[test] - fn simple_chars() { - compare_input_to_expected( - r" 'a''b' 'c' ", - vec![ - LT::from((CharLiteral, r"'a'")), - LT::from((CharLiteral, r"'b'")), - LT::from((CharLiteral, r"'c'")), - ], - ) - } - - #[test] - fn char_escapes() { - compare_input_to_expected( - r" 'a''b' '\'' ", - vec![ - LT::from((CharLiteral, r"'a'")), - LT::from((CharLiteral, r"'b'")), - LT::from((CharLiteral, r"'\''")), - ], - ) - } - - #[test] - fn unclosed_char() { - assert_eq!( - lex_input(r#" 'a"#), - Err(LexerError::from(UnclosedWrappedLiteral(CharLiteral))) - ) - } - - #[test] - fn basic_hex_literal() { - compare_input_to_expected( - "0xFFFF 0x123456789ABCDEF 0x01234567", - vec![ - LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), - LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), - LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), - ], - ) - } - - #[test] - fn underscores_in_hex_literal() { - compare_input_to_expected( - "0x__FF__F_F 0x_1_2_3456_789AB_CDE_F_ 0x_01_23_45_67", - vec![ - LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), - LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), - LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), - ], - ) - } - - #[test] - fn unexpected_end_of_hex() { - assert_eq!( - lex_input(r"0x"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Hexadecimal))) - ) - } - - #[test] - fn illegal_hex_chars() { - assert_eq!( - lex_input(r"0xasdfgh"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Hexadecimal - ))) - ) - } - - #[test] - fn hex_literal_on_boundary() { - compare_input_to_expected( - "(0x42069)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Hexadecimal), "0x42069")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_bin_literal() { - compare_input_to_expected( - "0b00010011 0b111010100001 0b0", - vec![ - LT::from((IntegerLiteral(Binary), "0b00010011")), - LT::from((IntegerLiteral(Binary), "0b111010100001")), - LT::from((IntegerLiteral(Binary), "0b0")), - ], - ) - } - - #[test] - fn underscores_in_bin_literal() { - compare_input_to_expected( - "0b_00_0_100_11 0b1_1_101_01000_01_ 0b_0_", - vec![ - LT::from((IntegerLiteral(Binary), "0b00010011")), - LT::from((IntegerLiteral(Binary), "0b111010100001")), - LT::from((IntegerLiteral(Binary), "0b0")), - ], - ) - } - - #[test] - fn unexpected_end_of_bin() { - assert_eq!( - lex_input(r"0b"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Binary))) - ) - } - - #[test] - fn illegal_bin_chars() { - assert_eq!( - lex_input(r"0b101a"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Binary - ))) - ) - } - - #[test] - fn bin_literal_on_boundary() { - compare_input_to_expected( - "(0b01000101)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Binary), "0b01000101")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_oct_literal() { - compare_input_to_expected( - "0o01234567 0o161343 0o00000001", - vec![ - LT::from((IntegerLiteral(Octal), "0o01234567")), - LT::from((IntegerLiteral(Octal), "0o161343")), - LT::from((IntegerLiteral(Octal), "0o00000001")), - ], - ) - } - - #[test] - fn underscores_in_oct_literal() { - compare_input_to_expected( - "0o01_234_56_7 0o_16134_3 0o000_00001_", - vec![ - LT::from((IntegerLiteral(Octal), "0o01234567")), - LT::from((IntegerLiteral(Octal), "0o161343")), - LT::from((IntegerLiteral(Octal), "0o00000001")), - ], - ) - } - - #[test] - fn unexpected_end_of_oct() { - assert_eq!( - lex_input(r"0o"), - Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Octal))) - ) - } - - #[test] - fn illegal_oct_chars() { - assert_eq!( - lex_input(r"0o1234567890abcdef"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Octal - ))) - ) - } - - #[test] - fn oct_literal_on_boundary() { - compare_input_to_expected( - "(0o420)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Octal), "0o420")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_dec_literal() { - compare_input_to_expected( - "123456789 1 0 2", - vec![ - LT::from((IntegerLiteral(Decimal), "123456789")), - LT::from((IntegerLiteral(Decimal), "1")), - LT::from((IntegerLiteral(Decimal), "0")), - LT::from((IntegerLiteral(Decimal), "2")), - ], - ) - } - - #[test] - fn underscores_in_dec_literal() { - compare_input_to_expected( - "1234_5_6789 1_ 0 2_2", - vec![ - LT::from((IntegerLiteral(Decimal), "123456789")), - LT::from((IntegerLiteral(Decimal), "1")), - LT::from((IntegerLiteral(Decimal), "0")), - LT::from((IntegerLiteral(Decimal), "22")), - ], - ) - } - - #[test] - fn illegal_dec_chars() { - assert_eq!( - lex_input(r"0123456789abcdef"), - Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( - Decimal - ))) - ) - } - - #[test] - fn dec_literal_on_boundary() { - compare_input_to_expected( - "(69)", - vec![ - LT::from((OpenParen, "(")), - LT::from((IntegerLiteral(Decimal), "69")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn basic_float_literal() { - compare_input_to_expected( - "0.0 0.1 1.0 420.69", - vec![ - LT::from((FloatLiteral, "0.0")), - LT::from((FloatLiteral, "0.1")), - LT::from((FloatLiteral, "1.0")), - LT::from((FloatLiteral, "420.69")), - ], - ) - } - - #[test] - fn underscores_in_float_literal() { - compare_input_to_expected( - "0_.0 0._1 1.0 1337_420.69", - vec![ - LT::from((FloatLiteral, "0.0")), - LT::from((FloatLiteral, "0.1")), - LT::from((FloatLiteral, "1.0")), - LT::from((FloatLiteral, "1337420.69")), - ], - ) - } - - #[test] - fn illegal_float_chars() { - assert_eq!( - lex_input(r"420.a69"), - Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) - ); - - assert_eq!( - lex_input(r"420.6s9"), - Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) - ); - } - - #[test] - fn float_literal_on_boundary() { - compare_input_to_expected( - "(420.69)", - vec![ - LT::from((OpenParen, "(")), - LT::from((FloatLiteral, "420.69")), - LT::from((CloseParen, ")")), - ], - ) - } - - #[test] - fn float_double_period() { - assert_eq!( - lex_input(r"420.69.1337"), - Err(LexerError::from(UnexpectedExtraPeriodInFloatLiteral)) - ); - } - - #[test] - fn float_end_with_period() { - assert_eq!( - lex_input(r"420."), - Err(LexerError::from(UnexpectedEndOfFloatLiteral)) - ); - } -} diff --git a/compiler/src/frontend/lexer/mod.rs b/compiler/src/frontend/lexer/mod.rs new file mode 100644 index 0000000..ea8292b --- /dev/null +++ b/compiler/src/frontend/lexer/mod.rs @@ -0,0 +1,764 @@ +use std::{ + cell::UnsafeCell, + collections::VecDeque, + fmt::{Debug, Display}, +}; + +use crate::{ + frontend::position::{Position, SourceFile, Span}, + frontend::reader::CharacterReader, +}; + +use self::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}; + +use super::position::highlight_span; + +mod test; +pub mod token; + +/// Represents an error when lexing a file +#[derive(Debug)] +pub struct LexerError { + kind: LexerErrorKind, + source_file: SourceFile, + position: Position, +} + +impl PartialEq for LexerError { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind + } +} + +#[cfg(test)] +impl From for LexerError { + fn from(kind: LexerErrorKind) -> Self { + use std::path::PathBuf; + + LexerError { + kind, + source_file: SourceFile { + path: Some(PathBuf::new()), + content: String::new(), + }, + position: Position::new(), + } + } +} + +#[derive(Debug, PartialEq)] +pub enum LexerErrorKind { + UnexpectedChar(char), + UnclosedWrappedLiteral(TokenKind), + UnexpectedEndOfFloatLiteral, + UnexpectedCharactersInFloatLiteral, + UnexpectedExtraPeriodInFloatLiteral, + UnexpectedEndOfIntegerLiteral(IntegerLiteralKind), + UnexpectedCharactersInIntegerLiteral(IntegerLiteralKind), +} + +impl Display for LexerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.kind { + LexerErrorKind::UnexpectedChar(c) => writeln!(f, "Unexpected char `{c}`"), + LexerErrorKind::UnclosedWrappedLiteral(kind) => { + writeln!(f, "Unexpected end of wrapped literal: {kind:?}") + } + LexerErrorKind::UnexpectedEndOfFloatLiteral => { + writeln!(f, "Unexpected end of float literal") + } + LexerErrorKind::UnexpectedCharactersInFloatLiteral => { + writeln!(f, "Unexpected characters inside float literal") + } + LexerErrorKind::UnexpectedExtraPeriodInFloatLiteral => { + writeln!(f, "Unexpected extra `.` inside float literal") + } + LexerErrorKind::UnexpectedEndOfIntegerLiteral(kind) => { + writeln!(f, "Unexpected end of {kind:?} integer literal") + } + LexerErrorKind::UnexpectedCharactersInIntegerLiteral(kind) => { + writeln!(f, "Unexpected characters inside {kind:?} integer literal") + } + }?; + + highlight_span(f, &self.source_file, Span::from_position(&self.position))?; + + Ok(()) + } +} + +/// This lexer implementation uses a "lazy" iterator approach such +/// that characters are not read from the input stream until a token is requested. +/// +/// The lexer uses interior mutability to allow for peeking ahead in the token stream. +/// Since peeking ahead modifies the state of the character reader, this would otherwise +/// not be possible, unless the peek function took a mutable reference to the lexer. +/// +/// UnsafeCell is used to allow for an optimization of the peek function that stores +/// the peeked tokens in a VecDeque. This is done to avoid having to re-lex the same +/// tokens multiple times. +pub struct Lexer { + character_reader: UnsafeCell>, + peek_forward: UnsafeCell>, +} + +impl Lexer { + pub fn new(character_reader: impl CharacterReader + 'static) -> Self { + Lexer { + character_reader: UnsafeCell::new(Box::new(character_reader)), + peek_forward: UnsafeCell::new(VecDeque::new()), + } + } + + /// Read a literal that is wrapped in the provided character + /// The wrapper character can be escaped using the backslash character `\` + fn read_wrapped_escapable(&self, wrapper: char, kind: TokenKind) -> Result { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + let mut text = String::new(); + let start = character_reader.get_position().clone(); + + macro_rules! get_next_char { + () => { + if let Some(c) = character_reader.next() { + c + } else { + return Err(LexerError { + kind: LexerErrorKind::UnclosedWrappedLiteral(kind), + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + }); + } + }; + } + + macro_rules! peek_nth_char { + ($n:expr) => { + if let Some(c) = character_reader.peek_nth($n) { + c + } else { + return Err(LexerError { + kind: LexerErrorKind::UnclosedWrappedLiteral(kind), + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + }); + } + }; + } + + macro_rules! expect_wrapper { + () => { + let c = get_next_char!(); + + if c != wrapper { + unreachable!( + "Not enough chars in character_reader (should be checked in advance)" + ); + } + + text.push(c); + }; + } + + // First Quote + expect_wrapper!(); + + // Read until next quote + while *peek_nth_char!(0) != wrapper { + // If escape char was found, read it in, and read in the escaped char + if *peek_nth_char!(0) == '\\' && *peek_nth_char!(1) == wrapper { + text.push(get_next_char!()); + } + + text.push(get_next_char!()); + } + + // Second Quote + expect_wrapper!(); + + let end = character_reader.get_position().clone(); + + Ok(Token { + kind, + text, + span: Span::new(start, end), + }) + } + + /// Reads a generic number literal into either an integer or double + fn read_number_literal(&self) -> Result { + /* + * Integer Cases: + * + * 1. `42069` - 1 or more dec digits + * 2. `0xF2AB` - `0x` and then 1 or more hex digits + * 3. `0b11010101` - `0b` and then 1 or more binary digits + * 4. `0o01234567` - `0o` and then 1 or more octal digits + * + * 5. `1337u32`, `69i32` - 1 or more dec digits followed by `u` or `i` (8, 16, 32, 64, 128, or size) + * + * If size is not specified, i32 is the default + * + * Anywhere within the digits, `_` is allowed and is ignored + * + * Float Cases: + * + * 1. `0.1375454` - 1 or more dec digits, a `.`, and 1 or more dec digits + * 2. `576.1375454f64` - 1 or more dec digits, a `.`, 1 or more dec digits followed by `f` (32 or 64) + * + * If size is not specified, f32 is the default + */ + + // TODO: Lex negative numbers + + let character_reader = unsafe { &mut *self.character_reader.get() }; + + // Look ahead to match different literal types + return 'number: { + if *character_reader.peek().unwrap() == '0' { + let Some(c) = character_reader.peek_nth(1) else { + // Only found `0` and nothing else so parse as int literal `0` + break 'number self.read_dec_int_or_float_literal(); + }; + + match *c { + // Hex int with format `0x` + 'x' => self.read_based_int_literal(IntegerLiteralKind::Hexadecimal, |c| { + c.is_ascii_hexdigit() + }), + // Bin int with format `0b` + 'b' => self.read_based_int_literal(IntegerLiteralKind::Binary, |c| { + c == '0' || c == '1' + }), + // Oct int with format `0o` + 'o' => self.read_based_int_literal(IntegerLiteralKind::Octal, |c| { + c.is_ascii_octdigit() + }), + // Float with format `0.` + '.' => self.read_dec_int_or_float_literal(), + // Int with format `0` or potentially float with format `0.` + a if a.is_ascii_digit() => self.read_dec_int_or_float_literal(), + // found non digit and non special specifier after leading `0`, so parse the `0` as a single dec digit + _ => self.read_dec_int_or_float_literal(), + } + } else { + // Any dec int or float that does not start with 0 + self.read_dec_int_or_float_literal() + } + }; + } + + fn read_based_int_literal( + &self, + literal_kind: IntegerLiteralKind, + is_in_base: fn(char) -> bool, + ) -> Result { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + macro_rules! create_error { + ($kind:expr) => { + LexerError { + kind: $kind, + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + } + }; + } + + macro_rules! get_next_char { + () => { + character_reader.next().ok_or_else(|| LexerError { + kind: LexerErrorKind::UnexpectedEndOfIntegerLiteral(literal_kind), + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + })? + }; + } + + let start = character_reader.get_position().clone(); + + let mut text = String::new(); + + // `0` + text.push(get_next_char!()); + // special boundary + text.push(get_next_char!()); + + while character_reader.has_next() { + let peeked_char = *character_reader.peek().unwrap(); + + if peeked_char == '_' { + // Ignore underscores + character_reader.next().unwrap(); + continue; + } else if !peeked_char.is_ascii_alphanumeric() { + // Stop parsing where we are if we encounter any symbols + break; + } + + // TODO: add support for type specifiers like `u32` and `i32` + + if !is_in_base(peeked_char) { + return Err(create_error!( + LexerErrorKind::UnexpectedCharactersInIntegerLiteral(literal_kind) + )); + } + + text.push(get_next_char!()); + } + + if text.len() <= 2 { + return Err(create_error!( + LexerErrorKind::UnexpectedEndOfIntegerLiteral(literal_kind) + )); + } + + let end = character_reader.get_position().clone(); + + Ok(Token { + kind: TokenKind::IntegerLiteral(literal_kind), + text, + span: Span::new(start, end), + }) + } + + fn read_dec_int_or_float_literal(&self) -> Result { + enum NumberLexingState { + Integer, + Float, + } + + let mut state = NumberLexingState::Integer; + + let character_reader = unsafe { &mut *self.character_reader.get() }; + + macro_rules! create_error { + ($kind:expr) => { + LexerError { + kind: $kind, + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + } + }; + } + + macro_rules! get_next_char { + () => { + character_reader.next().ok_or_else(|| LexerError { + kind: match state { + NumberLexingState::Integer => { + LexerErrorKind::UnexpectedEndOfIntegerLiteral( + IntegerLiteralKind::Decimal, + ) + } + NumberLexingState::Float => LexerErrorKind::UnexpectedEndOfFloatLiteral, + }, + source_file: character_reader.get_source_file().to_owned(), + position: character_reader.get_position().clone(), + })? + }; + } + + let start = character_reader.get_position().clone(); + + let mut text = String::new(); + + // first char + text.push(get_next_char!()); + + while character_reader.has_next() { + let peeked_char = *character_reader.peek().unwrap(); + + match peeked_char { + // Ignore underscores + '_' => { + character_reader.next().unwrap(); + continue; + } + // Stop lexing where we are if we encounter any symbols + c if !c.is_ascii_alphanumeric() && c != '.' => { + break; + } + // Non digit and non `.` characters while lexing + c if !c.is_ascii_digit() && c != '.' => { + return Err(create_error!(match state { + NumberLexingState::Integer => { + LexerErrorKind::UnexpectedCharactersInIntegerLiteral( + IntegerLiteralKind::Decimal, + ) + } + NumberLexingState::Float => + LexerErrorKind::UnexpectedCharactersInFloatLiteral, + })); + } + // Beginning of float, or an error + '.' => { + match state { + NumberLexingState::Integer => state = NumberLexingState::Float, + NumberLexingState::Float => { + return Err(create_error!( + LexerErrorKind::UnexpectedExtraPeriodInFloatLiteral + )) + } + } + text.push(get_next_char!()); + } + // Any ascii digit 0-9 + c if c.is_ascii_digit() => { + text.push(get_next_char!()); + } + // All other character types have already been matched + _ => unreachable!(), + } + + // TODO: add support for type specifiers like `u32` and `i32` + } + + // If we are in float mode, check if there were chars after the `.` + if let NumberLexingState::Float = state { + if text.ends_with('.') { + return Err(create_error!(LexerErrorKind::UnexpectedEndOfFloatLiteral)); + } + } + + let end = character_reader.get_position().clone(); + + Ok(Token { + kind: match state { + NumberLexingState::Integer => { + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal) + } + NumberLexingState::Float => TokenKind::FloatLiteral, + }, + text, + span: Span::new(start, end), + }) + } + + /// Advance the lexer while the next character matches the predicate, and return the resulting string + fn read_while(&self, predicate: fn(char) -> bool) -> String { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + let mut res = String::new(); + + while character_reader.has_next() { + let c = character_reader.peek().unwrap(); + + if !predicate(*c) { + return res; + } + + res.push(character_reader.next().unwrap()); + } + + res + } + + /// Advance the lexer while the next character matches the predicate, and discard the matched chars + fn ignore_while(&self, predicate: fn(char) -> bool) { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + while character_reader.has_next() { + let c = character_reader.peek().unwrap(); + + if !predicate(*c) { + return; + } + + character_reader.next(); + } + } + + /// Requires character to be available + fn read_single(&self, kind: TokenKind) -> Token { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + let start = character_reader.get_position().clone(); + let c = character_reader.next().unwrap(); + let end = character_reader.get_position().clone(); + + Token { + kind, + text: c.into(), + span: Span::new(start, end), + } + } + + /// Peeks the character reader several chars forward to look for a char sequence + fn lookahead_has(&self, string: &str, n: usize) -> bool { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + let chars = string.chars(); + + for (i, c) in chars.enumerate() { + let Some(peeked) = character_reader.peek_nth(n + i) else { + return false; + }; + + if *peeked != c { + return false; + } + } + + true + } + + /// Reads a fixed number of chars from the character reader and returns the resulting token + /// + /// Requires that the character reader be checked in advance to contain the correct sequence + fn read_multi(&self, string: &str, kind: TokenKind) -> Token { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + let mut text = String::new(); + let start = character_reader.get_position().clone(); + + for expected_char in string.chars() { + if !character_reader.has_next() { + unreachable!("Not enough chars in character_reader (should be checked in advance)") + } + + let c = character_reader.peek().unwrap(); + + if *c != expected_char { + unreachable!( + "Char from character_reader did not match (should be checked in advance)" + ) + } + + text.push(character_reader.next().unwrap()); + } + + let end = character_reader.get_position().clone(); + + Token { + kind, + text, + span: Span::new(start, end), + } + } + + /// Looks ahead to see if there is an `=` following the given prefix + fn lookahead_has_equals(&self, prefix: impl Into, n: usize) -> bool { + let mut c: String = prefix.into(); + c.push('='); + + self.lookahead_has(&c, n) + } + + /// Reads a fixed number of chars with an `=` suffixed to the given prefix from the character reader and returns the resulting token + /// + /// Requires that the character reader be checked in advance to contain the correct sequence + fn read_multi_equals(&self, prefix: impl Into, kind: TokenKind) -> Token { + let mut c: String = prefix.into(); + c.push('='); + + self.read_multi(&c, kind) + } + + fn read_single_operator(&self, c: char, single: TokenKind, equals: TokenKind) -> Token { + if self.lookahead_has_equals(c, 0) { + self.read_multi_equals(c, equals) + } else { + self.read_single(single) + } + } + + fn read_double_operator(&self, c: char, normal: TokenKind, equals: TokenKind) -> Token { + if self.lookahead_has_equals(c, 1) { + self.read_multi_equals(c.to_string().repeat(2), equals) + } else { + self.read_multi(&c.to_string().repeat(2), normal) + } + } + + fn read_next_token(&self) -> Result, LexerError> { + let character_reader = unsafe { &mut *self.character_reader.get() }; + + while character_reader.has_next() { + let start = character_reader.get_position().clone(); + + // SAFETY: always checking if more characters are available before unwrapping + let first_char = *character_reader.peek().unwrap(); + + let token = Ok(Some(match first_char { + // New lines are significant + '\n' => self.read_single(TokenKind::Newline), + + // Whitespace + a if a.is_ascii_whitespace() => { + self.ignore_while(|c| c.is_ascii_whitespace() && c != '\n'); + continue; + } + + // Chop Comments + '/' if character_reader.peek_nth(1).is_some_and(|c| *c == '/') => { + self.ignore_while(|c| c != '\n'); + continue; + } + + // Words + a if a.is_ascii_alphabetic() => { + let word = self.read_while(|c| c.is_ascii_alphanumeric() || c == '_'); + + Token { + kind: match KeywordKind::try_from(&word) { + Ok(kw_kind) => TokenKind::Keyword(kw_kind), + Err(_) => TokenKind::Identifier, + }, + text: word, + span: Span::new(start, character_reader.get_position().clone()), + } + } + + // Literals + '"' => self.read_wrapped_escapable('"', TokenKind::StringLiteral)?, + '\'' => self.read_wrapped_escapable('\'', TokenKind::CharLiteral)?, + a if a.is_ascii_digit() => self.read_number_literal()?, + + // Grouping Symbols + c @ ('(' | ')' | '[' | ']' | '{' | '}') => { + self.read_single(TokenKind::grouping_symbol_from(c)) + } + + // Arrows (`->`) + '-' if character_reader.peek_nth(1).is_some_and(|c| *c == '>') => { + self.read_multi("->", TokenKind::Arrow) + } + + // Bang Coalescing (`!.`) + '!' if character_reader.peek_nth(1).is_some_and(|c| *c == '.') => { + self.read_multi("!.", TokenKind::BangCoalescing) + } + + // Double Colon (`::`) + ':' if character_reader.peek_nth(1).is_some_and(|c| *c == ':') => { + self.read_multi("::", TokenKind::DoubleColon) + } + + // Double operators (must come first because of lookahead clash) + c @ ('*' | '&' | '|' | '>' | '<') + if character_reader.peek_nth(1).is_some_and(|x| *x == c) => + { + self.read_double_operator( + c, + TokenKind::double_operator_from(c), + TokenKind::double_equals_operator_from(c), + ) + } + + // Single Operators + c @ ('=' | '<' | '>' | '+' | '-' | '*' | '/' | '%' | '~' | '!' | '&' | '|' + | '^') => self.read_single_operator( + c, + TokenKind::single_operator_from(c), + TokenKind::single_equals_operator_from(c), + ), + + // Non-Operator symbols + c @ ('?' | ',' | ';' | ':' | '.' | '\\' | '_' | '@' | '#' | '$') => { + self.read_single(TokenKind::other_symbol_from(c)) + } + + // Other + c => { + return Err(LexerError { + kind: LexerErrorKind::UnexpectedChar(c), + source_file: character_reader.get_source_file().clone(), + position: character_reader.get_position().clone(), + }) + } + })); + + return token; + } + + // If got to the end of the cursor without finding any more tokens, + // then we will never return more tokens + Ok(None) + } + + fn get_next_cached_or_read(&self) -> Result, LexerError> { + let peek_forward = unsafe { &mut *self.peek_forward.get() }; + + // Check if more tokens have already been precomputed for us + if !peek_forward.is_empty() { + // Always returns `Some` + return Ok(peek_forward.pop_front()); + } + + self.read_next_token() + } +} + +/// Lexer public interface +impl Lexer { + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Result, LexerError> { + self.get_next_cached_or_read() + } + + pub fn peek(&self) -> Result, LexerError> { + let peek_forward = unsafe { &mut *self.peek_forward.get() }; + + // Check if more tokens have already been precomputed for us + if let Some(token) = peek_forward.front() { + // Always returns `Some` + return Ok(Some(token)); + } + + let peek_forward = unsafe { &mut *self.peek_forward.get() }; + + // If there are more tokens + if let Some(token) = self.get_next_cached_or_read()? { + // Store the token for later usage + peek_forward.push_back(token); + + // Return a reference to the token + Ok(peek_forward.front()) + } else { + // Otherwise, return None since there are no tokens to peek + Ok(None) + } + } + + pub fn peek_nth(&self, n: usize) -> Result, LexerError> { + let peek_forward = unsafe { &mut *self.peek_forward.get() }; + + // Check if `n` tokens have already been precomputed for us + if peek_forward.len() > n { + // Always returns `Some` + let peek = peek_forward; + return Ok(peek.get(n)); + } + + // Otherwise, pre-compute the next `n` tokens from the amount we've already computed + for _ in peek_forward.len()..=n { + // Get the next token or return early if none more are found + let Some(token) = self.read_next_token()? else { + return Ok(None); + }; + + // Store the token for later usage + peek_forward.push_back(token); + } + + // Always returns `Some` because we would not have completed the loop otherwise. + Ok(peek_forward.get(n)) + } + + pub fn has_next(&self) -> Result { + Ok(self.peek()?.is_some()) + } + + pub fn get_position(&self) -> &Position { + let character_reader = unsafe { &*self.character_reader.get() }; + + character_reader.get_position() + } + + pub fn get_source_file(&self) -> &SourceFile { + let character_reader = unsafe { &*self.character_reader.get() }; + + character_reader.get_source_file() + } +} diff --git a/compiler/src/frontend/lexer/test.rs b/compiler/src/frontend/lexer/test.rs new file mode 100644 index 0000000..c535b6d --- /dev/null +++ b/compiler/src/frontend/lexer/test.rs @@ -0,0 +1,516 @@ +#![cfg(test)] + +use crate::{ + frontend::lexer::{IntegerLiteralKind::*, KeywordKind::*, Token as LT, TokenKind::*}, + frontend::reader::FileReader, +}; + +use super::{Lexer, LexerError, LexerErrorKind::*}; + +fn compare_input_to_expected(input: &str, expected_tokens: Vec) { + // Collect tokens from lexer + let reader = FileReader::from(input.to_owned()); + let mut lexer = Lexer::new(reader); + + let mut lexer_tokens = Vec::new(); + + while lexer.has_next().unwrap() { + lexer_tokens.push(lexer.next().unwrap().unwrap()) + } + + assert_eq!( + lexer_tokens, expected_tokens, + "Lexer tokens did not match expected" + ) +} + +fn lex_input(input: &str) -> Result, LexerError> { + // Collect tokens from lexer + let reader = FileReader::from(input.to_owned()); + let mut lexer = Lexer::new(reader); + + let mut lexer_tokens = Vec::new(); + + while lexer.has_next()? { + lexer_tokens.push(lexer.next()?.unwrap()) + } + + Ok(lexer_tokens) +} + +#[test] +fn basic_example() { + compare_input_to_expected( + r#"fn main() { + leak node = Node() + + println() + }"#, + vec![ + LT::from((Keyword(Fn), "fn")), + LT::from((Identifier, "main")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((OpenCurlyBracket, "{")), + LT::from((Newline, "\n")), + LT::from((Keyword(Leak), "leak")), + LT::from((Identifier, "node")), + LT::from((Equals, "=")), + LT::from((Identifier, "Node")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((Identifier, "println")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((CloseCurlyBracket, "}")), + ], + ) +} + +#[test] +fn removes_comments() { + compare_input_to_expected( + r#"// this is a comment + fn main() { // this is a comment + leak node = Node() + // this is a comment + println() + // this is a comment + }// this is a comment"#, + vec![ + LT::from((Newline, "\n")), + LT::from((Keyword(Fn), "fn")), + LT::from((Identifier, "main")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((OpenCurlyBracket, "{")), + LT::from((Newline, "\n")), + LT::from((Keyword(Leak), "leak")), + LT::from((Identifier, "node")), + LT::from((Equals, "=")), + LT::from((Identifier, "Node")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((Identifier, "println")), + LT::from((OpenParen, "(")), + LT::from((CloseParen, ")")), + LT::from((Newline, "\n")), + LT::from((Newline, "\n")), + LT::from((CloseCurlyBracket, "}")), + ], + ) +} + +#[test] +fn basic_single_operators() { + compare_input_to_expected( + r#"= == < <= > >= + += - -= * *= / /= % %= ~ ~= ^ ^= | |= & &= ! !="#, + vec![ + LT::from((Equals, "=")), + LT::from((DoubleEquals, "==")), + LT::from((LessThan, "<")), + LT::from((LessThanOrEqual, "<=")), + LT::from((GreaterThan, ">")), + LT::from((GreaterThanOrEqual, ">=")), + LT::from((Plus, "+")), + LT::from((PlusEquals, "+=")), + LT::from((Minus, "-")), + LT::from((MinusEquals, "-=")), + LT::from((Asterisk, "*")), + LT::from((MultiplyEquals, "*=")), + LT::from((Divide, "/")), + LT::from((DivideEquals, "/=")), + LT::from((Modulo, "%")), + LT::from((ModuloEquals, "%=")), + LT::from((BitwiseNot, "~")), + LT::from((BitwiseNotEquals, "~=")), + LT::from((BitwiseXor, "^")), + LT::from((BitwiseXorEquals, "^=")), + LT::from((BitwiseOr, "|")), + LT::from((BitwiseOrEquals, "|=")), + LT::from((BitwiseAnd, "&")), + LT::from((BitwiseAndEquals, "&=")), + LT::from((LogicalNot, "!")), + LT::from((LogicalNotEquals, "!=")), + ], + ) +} + +#[test] +fn basic_double_operators() { + compare_input_to_expected( + r#"** **= << <<= >> >>= || ||= && &&="#, + vec![ + LT::from((Exponentiation, "**")), + LT::from((ExponentiationEquals, "**=")), + LT::from((LeftShift, "<<")), + LT::from((LeftShiftEquals, "<<=")), + LT::from((RightShift, ">>")), + LT::from((RightShiftEquals, ">>=")), + LT::from((LogicalOr, "||")), + LT::from((LogicalOrEquals, "||=")), + LT::from((LogicalAnd, "&&")), + LT::from((LogicalAndEquals, "&&=")), + ], + ) +} + +#[test] +fn double_non_operators() { + compare_input_to_expected( + r#"-> ->=-> - >"#, + vec![ + LT::from((Arrow, "->")), + LT::from((Arrow, "->")), + LT::from((Equals, "=")), + LT::from((Arrow, "->")), + LT::from((Minus, "-")), + LT::from((GreaterThan, ">")), + ], + ) +} + +#[test] +fn simple_string() { + compare_input_to_expected( + r#" "your mom 1""your mom 2" "your mom 3" "#, + vec![ + LT::from((StringLiteral, r#""your mom 1""#)), + LT::from((StringLiteral, r#""your mom 2""#)), + LT::from((StringLiteral, r#""your mom 3""#)), + ], + ) +} + +#[test] +fn string_quote_escapes() { + compare_input_to_expected( + r#" "your mom \"1\"" "your mom 2" "#, + vec![ + LT::from((StringLiteral, r#""your mom \"1\"""#)), + LT::from((StringLiteral, r#""your mom 2""#)), + ], + ) +} + +#[test] +fn unclosed_string() { + assert_eq!( + lex_input(r#" "this is a string that doesn't have a closing double quote"#), + Err(LexerError::from(UnclosedWrappedLiteral(StringLiteral))) + ) +} + +#[test] +fn simple_chars() { + compare_input_to_expected( + r" 'a''b' 'c' ", + vec![ + LT::from((CharLiteral, r"'a'")), + LT::from((CharLiteral, r"'b'")), + LT::from((CharLiteral, r"'c'")), + ], + ) +} + +#[test] +fn char_escapes() { + compare_input_to_expected( + r" 'a''b' '\'' ", + vec![ + LT::from((CharLiteral, r"'a'")), + LT::from((CharLiteral, r"'b'")), + LT::from((CharLiteral, r"'\''")), + ], + ) +} + +#[test] +fn unclosed_char() { + assert_eq!( + lex_input(r#" 'a"#), + Err(LexerError::from(UnclosedWrappedLiteral(CharLiteral))) + ) +} + +#[test] +fn basic_hex_literal() { + compare_input_to_expected( + "0xFFFF 0x123456789ABCDEF 0x01234567", + vec![ + LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), + LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), + LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), + ], + ) +} + +#[test] +fn underscores_in_hex_literal() { + compare_input_to_expected( + "0x__FF__F_F 0x_1_2_3456_789AB_CDE_F_ 0x_01_23_45_67", + vec![ + LT::from((IntegerLiteral(Hexadecimal), "0xFFFF")), + LT::from((IntegerLiteral(Hexadecimal), "0x123456789ABCDEF")), + LT::from((IntegerLiteral(Hexadecimal), "0x01234567")), + ], + ) +} + +#[test] +fn unexpected_end_of_hex() { + assert_eq!( + lex_input(r"0x"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Hexadecimal))) + ) +} + +#[test] +fn illegal_hex_chars() { + assert_eq!( + lex_input(r"0xasdfgh"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Hexadecimal + ))) + ) +} + +#[test] +fn hex_literal_on_boundary() { + compare_input_to_expected( + "(0x42069)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Hexadecimal), "0x42069")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_bin_literal() { + compare_input_to_expected( + "0b00010011 0b111010100001 0b0", + vec![ + LT::from((IntegerLiteral(Binary), "0b00010011")), + LT::from((IntegerLiteral(Binary), "0b111010100001")), + LT::from((IntegerLiteral(Binary), "0b0")), + ], + ) +} + +#[test] +fn underscores_in_bin_literal() { + compare_input_to_expected( + "0b_00_0_100_11 0b1_1_101_01000_01_ 0b_0_", + vec![ + LT::from((IntegerLiteral(Binary), "0b00010011")), + LT::from((IntegerLiteral(Binary), "0b111010100001")), + LT::from((IntegerLiteral(Binary), "0b0")), + ], + ) +} + +#[test] +fn unexpected_end_of_bin() { + assert_eq!( + lex_input(r"0b"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Binary))) + ) +} + +#[test] +fn illegal_bin_chars() { + assert_eq!( + lex_input(r"0b101a"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Binary + ))) + ) +} + +#[test] +fn bin_literal_on_boundary() { + compare_input_to_expected( + "(0b01000101)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Binary), "0b01000101")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_oct_literal() { + compare_input_to_expected( + "0o01234567 0o161343 0o00000001", + vec![ + LT::from((IntegerLiteral(Octal), "0o01234567")), + LT::from((IntegerLiteral(Octal), "0o161343")), + LT::from((IntegerLiteral(Octal), "0o00000001")), + ], + ) +} + +#[test] +fn underscores_in_oct_literal() { + compare_input_to_expected( + "0o01_234_56_7 0o_16134_3 0o000_00001_", + vec![ + LT::from((IntegerLiteral(Octal), "0o01234567")), + LT::from((IntegerLiteral(Octal), "0o161343")), + LT::from((IntegerLiteral(Octal), "0o00000001")), + ], + ) +} + +#[test] +fn unexpected_end_of_oct() { + assert_eq!( + lex_input(r"0o"), + Err(LexerError::from(UnexpectedEndOfIntegerLiteral(Octal))) + ) +} + +#[test] +fn illegal_oct_chars() { + assert_eq!( + lex_input(r"0o1234567890abcdef"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Octal + ))) + ) +} + +#[test] +fn oct_literal_on_boundary() { + compare_input_to_expected( + "(0o420)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Octal), "0o420")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_dec_literal() { + compare_input_to_expected( + "123456789 1 0 2", + vec![ + LT::from((IntegerLiteral(Decimal), "123456789")), + LT::from((IntegerLiteral(Decimal), "1")), + LT::from((IntegerLiteral(Decimal), "0")), + LT::from((IntegerLiteral(Decimal), "2")), + ], + ) +} + +#[test] +fn underscores_in_dec_literal() { + compare_input_to_expected( + "1234_5_6789 1_ 0 2_2", + vec![ + LT::from((IntegerLiteral(Decimal), "123456789")), + LT::from((IntegerLiteral(Decimal), "1")), + LT::from((IntegerLiteral(Decimal), "0")), + LT::from((IntegerLiteral(Decimal), "22")), + ], + ) +} + +#[test] +fn illegal_dec_chars() { + assert_eq!( + lex_input(r"0123456789abcdef"), + Err(LexerError::from(UnexpectedCharactersInIntegerLiteral( + Decimal + ))) + ) +} + +#[test] +fn dec_literal_on_boundary() { + compare_input_to_expected( + "(69)", + vec![ + LT::from((OpenParen, "(")), + LT::from((IntegerLiteral(Decimal), "69")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn basic_float_literal() { + compare_input_to_expected( + "0.0 0.1 1.0 420.69", + vec![ + LT::from((FloatLiteral, "0.0")), + LT::from((FloatLiteral, "0.1")), + LT::from((FloatLiteral, "1.0")), + LT::from((FloatLiteral, "420.69")), + ], + ) +} + +#[test] +fn underscores_in_float_literal() { + compare_input_to_expected( + "0_.0 0._1 1.0 1337_420.69", + vec![ + LT::from((FloatLiteral, "0.0")), + LT::from((FloatLiteral, "0.1")), + LT::from((FloatLiteral, "1.0")), + LT::from((FloatLiteral, "1337420.69")), + ], + ) +} + +#[test] +fn illegal_float_chars() { + assert_eq!( + lex_input(r"420.a69"), + Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) + ); + + assert_eq!( + lex_input(r"420.6s9"), + Err(LexerError::from(UnexpectedCharactersInFloatLiteral)) + ); +} + +#[test] +fn float_literal_on_boundary() { + compare_input_to_expected( + "(420.69)", + vec![ + LT::from((OpenParen, "(")), + LT::from((FloatLiteral, "420.69")), + LT::from((CloseParen, ")")), + ], + ) +} + +#[test] +fn float_double_period() { + assert_eq!( + lex_input(r"420.69.1337"), + Err(LexerError::from(UnexpectedExtraPeriodInFloatLiteral)) + ); +} + +#[test] +fn float_end_with_period() { + assert_eq!( + lex_input(r"420."), + Err(LexerError::from(UnexpectedEndOfFloatLiteral)) + ); +} \ No newline at end of file diff --git a/compiler/src/frontend/lexer/token.rs b/compiler/src/frontend/lexer/token.rs new file mode 100644 index 0000000..7ba411e --- /dev/null +++ b/compiler/src/frontend/lexer/token.rs @@ -0,0 +1,318 @@ +use std::fmt::{Debug, Display}; + +use crate::frontend::position::{Position, Span}; + +#[allow(dead_code)] +#[cfg_attr(not(test), derive(Debug))] +#[derive(Clone)] +pub struct Token { + pub kind: TokenKind, + pub text: String, + pub span: Span, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum TokenKind { + // Significant Whitespace + Newline, + + // Words + Keyword(KeywordKind), // leak + Identifier, // YourMom + + // Grouping + OpenParen, // ( + CloseParen, // ) + OpenBracket, // [ + CloseBracket, // ] + OpenCurlyBracket, // { + CloseCurlyBracket, // } + + // Literals + StringLiteral, // "your mom" + CharLiteral, // 'd' + IntegerLiteral(IntegerLiteralKind), // 69 + FloatLiteral, // 420.69 + + // Single Operators + Equals, // = + DoubleEquals, // == + LessThan, // < + LessThanOrEqual, // <= + GreaterThan, // > + GreaterThanOrEqual, // >= + Plus, // + + PlusEquals, // += + Minus, // - + MinusEquals, // -= + Asterisk, // * + MultiplyEquals, // *= + Divide, // / + DivideEquals, // /= + Modulo, // % + ModuloEquals, // %= + BitwiseNot, // ~ + BitwiseNotEquals, // ~= + BitwiseXor, // ^ + BitwiseXorEquals, // ^= + BitwiseOr, // | + BitwiseOrEquals, // |= + BitwiseAnd, // & + BitwiseAndEquals, // &= + LogicalNot, // ! + LogicalNotEquals, // != + + // Double Operators + Exponentiation, // ** + ExponentiationEquals, // **= + LeftShift, // << + LeftShiftEquals, // <<= + RightShift, // >> + RightShiftEquals, // >>= + LogicalOr, // || + LogicalOrEquals, // ||= + LogicalAnd, // && + LogicalAndEquals, // &&= + + // Non-Operator symbols + Arrow, // -> + QuestionMark, // ? + Comma, // , + Semicolon, // ; + Colon, // : + DoubleColon, // :: + Period, // . + BangCoalescing, // !. + BackSlash, // \ + Underscore, // _ + Asperand, // @ + Hash, // # + DollarSign, // $ +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum IntegerLiteralKind { + Decimal, + Hexadecimal, + Binary, + Octal, +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +pub enum KeywordKind { + Fn, + Struct, + Leak, + Hold, + Perm, + If, + Else, + While, + For, + Yeet, +} + +impl From<(TokenKind, T)> for Token +where + T: Into + Sized, +{ + fn from((kind, text): (TokenKind, T)) -> Self { + Self { + kind, + text: text.into(), + span: Span::from(Position::new()), + } + } +} + +#[cfg(test)] +impl Debug for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Token") + .field("kind", &self.kind) + .field("text", &self.text) + .finish() + } +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} => {:?}", self.kind, self.text) + } +} + +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.text == other.text + } +} + +impl TokenKind { + pub fn is_assignment_operator(&self) -> bool { + matches!( + self, + Self::Equals + | Self::PlusEquals + | Self::MinusEquals + | Self::MultiplyEquals + | Self::DivideEquals + | Self::ModuloEquals + | Self::BitwiseNotEquals + | Self::BitwiseXorEquals + | Self::BitwiseOrEquals + | Self::BitwiseAndEquals + | Self::LogicalNotEquals + | Self::ExponentiationEquals + | Self::LeftShiftEquals + | Self::RightShiftEquals + | Self::LogicalOrEquals + | Self::LogicalAndEquals + ) + } + + pub fn is_unary_operator(&self) -> bool { + matches!(self, Self::BitwiseNot | Self::LogicalNot | Self::Asterisk) + } + + pub fn is_binary_operator(&self) -> bool { + matches!( + self, + Self::DoubleEquals + | Self::LessThan + | Self::LessThanOrEqual + | Self::GreaterThan + | Self::GreaterThanOrEqual + | Self::Plus + | Self::Minus + | Self::Asterisk + | Self::Divide + | Self::Modulo + | Self::BitwiseXor + | Self::BitwiseOr + | Self::BitwiseAnd + | Self::Exponentiation + | Self::LeftShift + | Self::RightShift + | Self::LogicalOr + | Self::LogicalAnd + ) + } + + pub fn is_literal(&self) -> bool { + matches!( + self, + Self::CharLiteral | Self::StringLiteral | Self::FloatLiteral | Self::IntegerLiteral(_) + ) + } + + pub fn grouping_symbol_from(c: char) -> TokenKind { + match c { + '(' => Self::OpenParen, + ')' => Self::CloseParen, + '[' => Self::OpenBracket, + ']' => Self::CloseBracket, + '{' => Self::OpenCurlyBracket, + '}' => Self::CloseCurlyBracket, + x => unreachable!("Illegal non-grouping symbol `{}`", x), + } + } + + pub fn single_operator_from(c: char) -> TokenKind { + match c { + '=' => Self::Equals, + '<' => Self::LessThan, + '>' => Self::GreaterThan, + '!' => Self::LogicalNot, + '+' => Self::Plus, + '-' => Self::Minus, + '*' => Self::Asterisk, + '/' => Self::Divide, + '%' => Self::Modulo, + '~' => Self::BitwiseNot, + '^' => Self::BitwiseXor, + '|' => Self::BitwiseOr, + '&' => Self::BitwiseAnd, + x => unreachable!("Illegal single non-operator `{}`", x), + } + } + + pub fn double_operator_from(c: char) -> TokenKind { + match c { + '*' => Self::Exponentiation, + '<' => Self::LeftShift, + '>' => Self::RightShift, + '&' => Self::LogicalAnd, + '|' => Self::LogicalOr, + x => unreachable!("Illegal double non-operator `{}`", x), + } + } + + pub fn single_equals_operator_from(c: char) -> TokenKind { + match c { + '=' => Self::DoubleEquals, + '<' => Self::LessThanOrEqual, + '>' => Self::GreaterThanOrEqual, + '!' => Self::LogicalNotEquals, + '+' => Self::PlusEquals, + '-' => Self::MinusEquals, + '*' => Self::MultiplyEquals, + '/' => Self::DivideEquals, + '%' => Self::ModuloEquals, + '~' => Self::BitwiseNotEquals, + '^' => Self::BitwiseXorEquals, + '|' => Self::BitwiseOrEquals, + '&' => Self::BitwiseAndEquals, + x => unreachable!("Illegal single non-equals-operator `{}`", x), + } + } + + pub fn double_equals_operator_from(c: char) -> TokenKind { + match c { + '*' => Self::ExponentiationEquals, + '<' => Self::LeftShiftEquals, + '>' => Self::RightShiftEquals, + '&' => Self::LogicalAndEquals, + '|' => Self::LogicalOrEquals, + x => unreachable!("Illegal double non-equals-operator `{}`", x), + } + } + + pub fn other_symbol_from(c: impl Into) -> TokenKind { + match c.into().as_str() { + "->" => Self::Arrow, + "?" => Self::QuestionMark, + "!." => Self::BangCoalescing, + "," => Self::Comma, + ";" => Self::Semicolon, + ":" => Self::Colon, + "::" => Self::DoubleColon, + "." => Self::Period, + "\\" => Self::BackSlash, + "_" => Self::Underscore, + "@" => Self::Asperand, + "#" => Self::Hash, + "$" => Self::DollarSign, + x => unreachable!("Illegal non-other-symbol `{}`", x), + } + } +} + +impl TryFrom<&String> for KeywordKind { + type Error = (); + + fn try_from(value: &String) -> Result { + Ok(match value.as_str() { + "fn" => Self::Fn, + "struct" => Self::Struct, + "leak" => Self::Leak, + "hold" => Self::Hold, + "perm" => Self::Perm, + "if" => Self::If, + "else" => Self::Else, + "while" => Self::While, + "for" => Self::For, + "yeet" => Self::Yeet, + _ => return Err(()), + }) + } +} \ No newline at end of file diff --git a/compiler/src/frontend/mod.rs b/compiler/src/frontend/mod.rs index 5c4ac64..e97be17 100644 --- a/compiler/src/frontend/mod.rs +++ b/compiler/src/frontend/mod.rs @@ -1,15 +1,11 @@ use std::path::PathBuf; use crate::{ - common::error::LeekCompilerError, - frontend::{ - lexer::LeekLexer, - parser::{LeekParser, Parser}, - reader::FileReader, - }, + common::error::CompilerError, + frontend::{lexer::Lexer, parser::Parser, reader::FileReader}, }; -use self::ast::LeekAst; +use self::ast::Ast; pub mod ast; pub mod lexer; @@ -17,26 +13,26 @@ pub mod parser; pub mod position; pub mod reader; -pub fn parse_file(path: PathBuf) -> Result { +pub fn parse_file(path: PathBuf) -> Result { let reader = FileReader::new(path)?; - let lexer = LeekLexer::new(reader); - let parse_tree = LeekParser::parse(lexer)?; + let lexer = Lexer::new(reader); + let parse_tree = Parser::parse(lexer)?; println!("{}", &parse_tree.root); - let ast = LeekAst::build_from(parse_tree); + let ast = Ast::build_from(parse_tree); Ok(ast) } -pub fn parse_string(source: String) -> Result { +pub fn parse_string(source: String) -> Result { let reader = FileReader::from(source); - let lexer = LeekLexer::new(reader); - let parse_tree = LeekParser::parse(lexer)?; + let lexer = Lexer::new(reader); + let parse_tree = Parser::parse(lexer)?; println!("{}", &parse_tree.root); - let ast = LeekAst::build_from(parse_tree); + let ast = Ast::build_from(parse_tree); Ok(ast) } diff --git a/compiler/src/frontend/parser.rs b/compiler/src/frontend/parser.rs index 1cb2ccd..54e1965 100644 --- a/compiler/src/frontend/parser.rs +++ b/compiler/src/frontend/parser.rs @@ -2,13 +2,14 @@ use core::panic; use std::fmt::Display; use crate::{ - common::error::LeekCompilerError, - frontend::lexer::{IntegerLiteralKind, KeywordKind, LeekToken, LeekTokenKind, Lexer}, - frontend::position::{SourceFile, Span}, + common::error::CompilerError, + frontend::lexer::{ + token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, + Lexer, + }, + frontend::position::{highlight_span, SourceFile, Span}, }; -use super::position::highlight_span; - #[derive(Debug)] pub struct ParseTree { pub root: ParseTreeNode, @@ -23,7 +24,7 @@ impl PartialEq for ParseTree { #[derive(Debug, PartialEq, Clone)] pub enum ParseTreeNode { - Terminal(LeekToken), + Terminal(Token), NonTerminal(ParseTreeNodeNonTerminal), } @@ -51,7 +52,7 @@ impl ParseTreeNode { } } - pub fn terminal_token(&self) -> &LeekToken { + pub fn terminal_token(&self) -> &Token { if let ParseTreeNode::Terminal(token) = self { token } else { @@ -74,6 +75,7 @@ macro_rules! terminal { }; } +#[allow(unused_macros)] macro_rules! non_terminal { ($kind:expr, $children:expr) => { ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -135,17 +137,17 @@ impl Display for ParserError { f, "Unexpected token {:?}. Expected one of: {:?}", found, expected - )?, + ), ParserErrorKind::UnexpectedKeyword { expected, found } => writeln!( f, "Unexpected keyword {:?}. Expected one of: {:?}", found, expected - )?, - ParserErrorKind::UnexpectedEndOfInput => writeln!(f, "Unexpected end of input.")?, + ), + ParserErrorKind::UnexpectedEndOfInput => writeln!(f, "Unexpected end of input."), ParserErrorKind::IndexIntoNonIdentifier => { - writeln!(f, "Cannot access field of non-struct object.")? + writeln!(f, "Cannot access field of non-struct object.") } - } + }?; highlight_span(f, &self.source_file, self.span.clone())?; @@ -156,8 +158,8 @@ impl Display for ParserError { #[derive(Debug)] pub enum ParserErrorKind { UnexpectedToken { - expected: Vec, - found: LeekTokenKind, + expected: Vec, + found: TokenKind, }, UnexpectedKeyword { expected: Vec, @@ -167,32 +169,19 @@ pub enum ParserErrorKind { IndexIntoNonIdentifier, } -pub trait Parser { - /// Takes in a lexer and returns the root of a parse tree - fn parse(lexer: impl Lexer + 'static) -> Result; -} - -pub struct LeekParser { - lexer: Box, +pub struct Parser { + lexer: Lexer, } -impl Parser for LeekParser { - fn parse(lexer: impl Lexer + 'static) -> Result { - let mut parser = LeekParser::new(lexer); +impl Parser { + pub fn parse(lexer: Lexer) -> Result { + let mut parser = Parser { lexer }; parser.parse_from_lexer() } -} - -impl LeekParser { - fn new(lexer: impl Lexer + 'static) -> Self { - Self { - lexer: Box::new(lexer), - } - } /// Peeks the next token or returns an error if there are none left - fn peek_expect(&self) -> Result<&LeekToken, LeekCompilerError> { + fn peek_expect(&self) -> Result<&Token, CompilerError> { self.lexer.peek()?.ok_or_else(|| { ParserError { kind: ParserErrorKind::UnexpectedEndOfInput, @@ -204,14 +193,15 @@ impl LeekParser { } /// Grabs the next token and asserts that it is the provided type - fn peek_expect_is(&self, kind: LeekTokenKind) -> Result { + fn peek_expect_is(&self, kind: TokenKind) -> Result { let token = self.peek_expect()?; Ok(token.kind == kind) } /// Peeks the nth token or returns an error if there are none left - fn peek_nth_expect(&self, n: usize) -> Result<&LeekToken, LeekCompilerError> { + #[allow(unused)] + fn peek_nth_expect(&self, n: usize) -> Result<&Token, CompilerError> { self.lexer.peek_nth(n)?.ok_or_else(|| { ParserError { kind: ParserErrorKind::UnexpectedEndOfInput, @@ -223,10 +213,7 @@ impl LeekParser { } /// Peeks the next token and asserts that it is one of the provided types - fn peek_expect_is_of( - &self, - kinds: Vec, - ) -> Result<&LeekToken, LeekCompilerError> { + fn peek_expect_is_of(&self, kinds: Vec) -> Result<&Token, CompilerError> { let token = self.peek_expect()?; if !kinds.contains(&token.kind) { @@ -245,10 +232,7 @@ impl LeekParser { } /// Searches the next token ignoring new lines - fn peek_nth_ignore_whitespace( - &self, - n: usize, - ) -> Result, LeekCompilerError> { + fn peek_nth_ignore_whitespace(&self, n: usize) -> Result, CompilerError> { let mut peek_index = 0; let mut non_nl_tokens = 0; @@ -258,7 +242,7 @@ impl LeekParser { }; match peeked.kind { - LeekTokenKind::Newline => { + TokenKind::Newline => { peek_index += 1; continue; } @@ -277,17 +261,17 @@ impl LeekParser { } /// Peeks the nth token or returns an error if there are none left - fn peek_nth_ignore_whitespace_expect(&self, n: usize) -> Result<&LeekToken, LeekCompilerError> { + fn peek_nth_ignore_whitespace_expect(&self, n: usize) -> Result<&Token, CompilerError> { self.peek_nth_ignore_whitespace(n)? .ok_or_else(|| self.create_error(ParserErrorKind::UnexpectedEndOfInput)) } /// Ignores tokens while they are new lines - fn bleed_whitespace(&mut self) -> Result<(), LeekCompilerError> { + fn bleed_whitespace(&mut self) -> Result<(), CompilerError> { while self .lexer .peek()? - .is_some_and(|t| t.kind == LeekTokenKind::Newline) + .is_some_and(|t| t.kind == TokenKind::Newline) { self.lexer.next()?; } @@ -296,14 +280,14 @@ impl LeekParser { } /// Grabs the next token or throws an error if none were found - fn next_expect(&mut self) -> Result { + fn next_expect(&mut self) -> Result { self.lexer .next()? .ok_or_else(|| self.create_error(ParserErrorKind::UnexpectedEndOfInput)) } /// Grabs the next token and asserts that it is the provided type - fn next_expect_is(&mut self, kind: LeekTokenKind) -> Result { + fn next_expect_is(&mut self, kind: TokenKind) -> Result { let token = self.next_expect()?; if token.kind != kind { @@ -320,10 +304,7 @@ impl LeekParser { } /// Gets the next token and asserts that it is one of the provided types - fn next_expect_is_of( - &mut self, - kinds: Vec, - ) -> Result { + fn next_expect_is_of(&mut self, kinds: Vec) -> Result { let token = self.next_expect()?; if !kinds.contains(&token.kind) { @@ -340,7 +321,7 @@ impl LeekParser { } /// Creates the associated error variant from the lexer's current position - fn create_error(&self, kind: ParserErrorKind) -> LeekCompilerError { + fn create_error(&self, kind: ParserErrorKind) -> CompilerError { ParserError { kind, source_file: self.lexer.get_source_file().clone(), @@ -350,7 +331,7 @@ impl LeekParser { } /// Creates the associated error variant from a span - fn create_error_with_span(&self, kind: ParserErrorKind, span: Span) -> LeekCompilerError { + fn create_error_with_span(&self, kind: ParserErrorKind, span: Span) -> CompilerError { ParserError { kind, source_file: self.lexer.get_source_file().clone(), @@ -368,22 +349,20 @@ impl LeekParser { /// | StaticVariableDeclaration /// )+ /// - fn parse_program_part(&mut self) -> Result { + fn parse_program_part(&mut self) -> Result { let peeked_token = self.peek_expect()?; match peeked_token.kind { // FunctionDefinition or FunctionDeclaration - LeekTokenKind::Keyword(KeywordKind::Fn) => { - self.parse_function_declaration_or_definition() - } + TokenKind::Keyword(KeywordKind::Fn) => self.parse_function_declaration_or_definition(), // StructDefinition - LeekTokenKind::Keyword(KeywordKind::Struct) => self.parse_struct_definition(), + TokenKind::Keyword(KeywordKind::Struct) => self.parse_struct_definition(), // ConstantVariableDeclaration - LeekTokenKind::Keyword(KeywordKind::Perm) => self.parse_constant_variable_declaration(), + TokenKind::Keyword(KeywordKind::Perm) => self.parse_constant_variable_declaration(), // StaticVariableDeclaration - LeekTokenKind::Keyword(KeywordKind::Hold) => self.parse_static_variable_declaration(), + TokenKind::Keyword(KeywordKind::Hold) => self.parse_static_variable_declaration(), // Unexpected keyword - LeekTokenKind::Keyword(kw) => Err(self.create_error_with_span( + TokenKind::Keyword(kw) => Err(self.create_error_with_span( ParserErrorKind::UnexpectedKeyword { expected: vec![ KeywordKind::Fn, @@ -399,10 +378,10 @@ impl LeekParser { _ => Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Keyword(KeywordKind::Fn), - LeekTokenKind::Keyword(KeywordKind::Struct), - LeekTokenKind::Keyword(KeywordKind::Perm), - LeekTokenKind::Keyword(KeywordKind::Hold), + TokenKind::Keyword(KeywordKind::Fn), + TokenKind::Keyword(KeywordKind::Struct), + TokenKind::Keyword(KeywordKind::Perm), + TokenKind::Keyword(KeywordKind::Hold), ], found: peeked_token.kind, }, @@ -417,13 +396,11 @@ impl LeekParser { /// FunctionDeclaration :: /// `fn` QualifiedIdentifier FunctionParameters FunctionReturnType? Newline /// - fn parse_function_declaration_or_definition( - &mut self, - ) -> Result { + fn parse_function_declaration_or_definition(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Fn))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Fn))? )); self.bleed_whitespace()?; @@ -434,7 +411,7 @@ impl LeekParser { if self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::Arrow) + .is_some_and(|token| token.kind == TokenKind::Arrow) { self.bleed_whitespace()?; children.push(self.parse_return_type()?); @@ -451,9 +428,9 @@ impl LeekParser { /// FunctionReturnType :: /// `->` Type - fn parse_return_type(&mut self) -> Result { + fn parse_return_type(&mut self) -> Result { let children = vec![ - terminal!(self.next_expect_is(LeekTokenKind::Arrow)?), + terminal!(self.next_expect_is(TokenKind::Arrow)?), self.parse_type()?, ]; @@ -467,20 +444,20 @@ impl LeekParser { /// `(` /// (TypeAssociation `,`)* TypeAssociation /// `)` - fn parse_function_parameters(&mut self) -> Result { + fn parse_function_parameters(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; match self.peek_expect()?.kind { - LeekTokenKind::CloseParen => {} + TokenKind::CloseParen => {} _ => { children.push(self.parse_type_association()?); self.bleed_whitespace()?; - while self.peek_expect_is(LeekTokenKind::Comma)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Comma)?)); + while self.peek_expect_is(TokenKind::Comma)? { + children.push(terminal!(self.next_expect_is(TokenKind::Comma)?)); self.bleed_whitespace()?; children.push(self.parse_type_association()?); self.bleed_whitespace()?; @@ -489,7 +466,7 @@ impl LeekParser { } self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::FunctionParameters, @@ -501,12 +478,10 @@ impl LeekParser { /// `{` /// (Block | Statement)* /// `}` - fn parse_block(&mut self) -> Result { + fn parse_block(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; while self.lexer.has_next()? { @@ -514,21 +489,19 @@ impl LeekParser { match token.kind { // Ignore preceding newlines - LeekTokenKind::Newline => { + TokenKind::Newline => { self.lexer.next()?; } // Allow recursive blocks - LeekTokenKind::OpenCurlyBracket => children.push(self.parse_block()?), + TokenKind::OpenCurlyBracket => children.push(self.parse_block()?), // Break the loop if a closing bracket is found - LeekTokenKind::CloseCurlyBracket => break, + TokenKind::CloseCurlyBracket => break, _ => children.push(self.parse_statement()?), } } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); self.bleed_whitespace()?; Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -544,22 +517,22 @@ impl LeekParser { /// | (QualifiedIdentifier assignment Expression) /// | (FunctionCallExpression) /// ) - fn parse_statement(&mut self) -> Result { + fn parse_statement(&mut self) -> Result { let mut children = Vec::new(); match self.peek_expect()?.kind { - LeekTokenKind::Keyword(KeywordKind::Yeet) => { + TokenKind::Keyword(KeywordKind::Yeet) => { children.push(self.parse_yeet_statement()?); } - LeekTokenKind::Keyword(KeywordKind::Leak) => { + TokenKind::Keyword(KeywordKind::Leak) => { children.push(self.parse_local_variable_declaration()?); } - k @ LeekTokenKind::Identifier => { + k @ TokenKind::Identifier => { let identifier = self.parse_qualified_identifier()?; // Could be assignment or function call match self.peek_nth_ignore_whitespace_expect(0)?.kind { - LeekTokenKind::OpenParen => { + TokenKind::OpenParen => { children.push(self.parse_function_call_expression(identifier)?) } k if k.is_assignment_operator() => { @@ -568,23 +541,23 @@ impl LeekParser { _ => { return Err(self.create_error(ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::OpenParen, - LeekTokenKind::Equals, - LeekTokenKind::PlusEquals, - LeekTokenKind::MinusEquals, - LeekTokenKind::MultiplyEquals, - LeekTokenKind::DivideEquals, - LeekTokenKind::ModuloEquals, - LeekTokenKind::BitwiseNotEquals, - LeekTokenKind::BitwiseXorEquals, - LeekTokenKind::BitwiseOrEquals, - LeekTokenKind::BitwiseAndEquals, - LeekTokenKind::LogicalNotEquals, - LeekTokenKind::ExponentiationEquals, - LeekTokenKind::LeftShiftEquals, - LeekTokenKind::RightShiftEquals, - LeekTokenKind::LogicalOrEquals, - LeekTokenKind::LogicalAndEquals, + TokenKind::OpenParen, + TokenKind::Equals, + TokenKind::PlusEquals, + TokenKind::MinusEquals, + TokenKind::MultiplyEquals, + TokenKind::DivideEquals, + TokenKind::ModuloEquals, + TokenKind::BitwiseNotEquals, + TokenKind::BitwiseXorEquals, + TokenKind::BitwiseOrEquals, + TokenKind::BitwiseAndEquals, + TokenKind::LogicalNotEquals, + TokenKind::ExponentiationEquals, + TokenKind::LeftShiftEquals, + TokenKind::RightShiftEquals, + TokenKind::LogicalOrEquals, + TokenKind::LogicalAndEquals, ], found: k, })); @@ -594,9 +567,9 @@ impl LeekParser { k => { return Err(self.create_error(ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Keyword(KeywordKind::Yeet), - LeekTokenKind::Keyword(KeywordKind::Leak), - LeekTokenKind::Identifier, + TokenKind::Keyword(KeywordKind::Yeet), + TokenKind::Keyword(KeywordKind::Leak), + TokenKind::Identifier, ], found: k, })); @@ -604,14 +577,11 @@ impl LeekParser { } match self - .peek_expect_is_of(vec![ - LeekTokenKind::Newline, - LeekTokenKind::CloseCurlyBracket, - ])? + .peek_expect_is_of(vec![TokenKind::Newline, TokenKind::CloseCurlyBracket])? .kind { - LeekTokenKind::Newline => children.push(terminal!(self.next_expect()?)), - LeekTokenKind::CloseCurlyBracket => {} + TokenKind::Newline => children.push(terminal!(self.next_expect()?)), + TokenKind::CloseCurlyBracket => {} _ => unreachable!(), } @@ -621,11 +591,11 @@ impl LeekParser { })) } - fn parse_yeet_statement(&mut self) -> Result { + fn parse_yeet_statement(&mut self) -> Result { let mut children = Vec::with_capacity(2); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Yeet))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Yeet))? )); self.bleed_whitespace()?; @@ -637,24 +607,24 @@ impl LeekParser { })) } - fn parse_local_variable_declaration(&mut self) -> Result { + fn parse_local_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Leak))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Leak))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; // Parse explicit type match self.peek_expect()?.kind { // No type def found - LeekTokenKind::Equals => {} + TokenKind::Equals => {} // Found type def - LeekTokenKind::Colon => { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + TokenKind::Colon => { + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; todo!("parse explicit type in leak statement") @@ -662,7 +632,7 @@ impl LeekParser { k => { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { - expected: vec![LeekTokenKind::Colon, LeekTokenKind::Equals], + expected: vec![TokenKind::Colon, TokenKind::Equals], found: k, }, self.peek_expect()?.span.clone(), @@ -670,7 +640,7 @@ impl LeekParser { } } - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); @@ -684,8 +654,8 @@ impl LeekParser { fn parse_variable_assignment( &mut self, identifier: ParseTreeNode, - operator: LeekTokenKind, - ) -> Result { + operator: TokenKind, + ) -> Result { let mut children = Vec::new(); children.push(identifier); @@ -710,22 +680,20 @@ impl LeekParser { /// | StructInitialization /// | StructFieldAccess /// | StructMethodCall - fn parse_expression(&mut self) -> Result { + fn parse_expression(&mut self) -> Result { let mut node = match self.peek_expect()?.kind { - LeekTokenKind::OpenParen => self.parse_atom()?, - LeekTokenKind::CharLiteral - | LeekTokenKind::StringLiteral - | LeekTokenKind::IntegerLiteral(_) - | LeekTokenKind::FloatLiteral => self.parse_atom()?, + TokenKind::OpenParen => self.parse_atom()?, + TokenKind::CharLiteral + | TokenKind::StringLiteral + | TokenKind::IntegerLiteral(_) + | TokenKind::FloatLiteral => self.parse_atom()?, k if k.is_unary_operator() => self.parse_unary_expression()?, - LeekTokenKind::Identifier => { + TokenKind::Identifier => { let identifier = self.parse_qualified_identifier()?; match self.peek_nth_ignore_whitespace_expect(0)?.kind { - LeekTokenKind::OpenParen => self.parse_function_call_expression(identifier)?, - LeekTokenKind::OpenCurlyBracket => { - self.parse_struct_initialization(identifier)? - } + TokenKind::OpenParen => self.parse_function_call_expression(identifier)?, + TokenKind::OpenCurlyBracket => self.parse_struct_initialization(identifier)?, _ => self.parse_atom_from_identifier(identifier)?, } } @@ -733,15 +701,15 @@ impl LeekParser { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::OpenParen, - LeekTokenKind::CharLiteral, - LeekTokenKind::StringLiteral, - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Octal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), - LeekTokenKind::FloatLiteral, - LeekTokenKind::Identifier, + TokenKind::OpenParen, + TokenKind::CharLiteral, + TokenKind::StringLiteral, + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Octal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), + TokenKind::FloatLiteral, + TokenKind::Identifier, ], found: k, }, @@ -750,7 +718,7 @@ impl LeekParser { } }; - while self.peek_nth_ignore_whitespace_expect(0)?.kind == LeekTokenKind::Period { + while self.peek_nth_ignore_whitespace_expect(0)?.kind == TokenKind::Period { // Check to see if it is an indexable object match node.non_terminal().kind { ParseTreeNonTerminalKind::QualifiedIdentifier @@ -775,12 +743,11 @@ impl LeekParser { let ParseTreeNonTerminalKind::QualifiedIdentifier = child.kind else { return Err(self.create_error(ParserErrorKind::IndexIntoNonIdentifier)); - }; }; node = match self.peek_nth_ignore_whitespace_expect(2)?.kind { - LeekTokenKind::OpenParen => self.parse_struct_method_call( + TokenKind::OpenParen => self.parse_struct_method_call( ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::Expression, children: vec![node], @@ -821,22 +788,22 @@ impl LeekParser { fn parse_function_call_expression( &mut self, identifier: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); children.push(identifier); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; match self.peek_expect()?.kind { - LeekTokenKind::CloseParen => {} + TokenKind::CloseParen => {} _ => children.push(self.parse_function_arguments()?), } self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::FunctionCallExpression, @@ -848,14 +815,14 @@ impl LeekParser { /// ( /// (Expression `,`)* Expression /// ) - fn parse_function_arguments(&mut self) -> Result { + fn parse_function_arguments(&mut self) -> Result { let mut children = Vec::new(); children.push(self.parse_expression()?); self.bleed_whitespace()?; - while self.peek_expect_is(LeekTokenKind::Comma)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Comma)?)); + while self.peek_expect_is(TokenKind::Comma)? { + children.push(terminal!(self.next_expect_is(TokenKind::Comma)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); self.bleed_whitespace()?; @@ -876,37 +843,30 @@ impl LeekParser { fn parse_struct_initialization( &mut self, identifier: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); children.push(identifier); self.bleed_whitespace()?; - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; - while !self.peek_expect_is(LeekTokenKind::CloseCurlyBracket)? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + while !self.peek_expect_is(TokenKind::CloseCurlyBracket)? { + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); - self.peek_expect_is_of(vec![ - LeekTokenKind::Newline, - LeekTokenKind::CloseCurlyBracket, - ])?; + self.peek_expect_is_of(vec![TokenKind::Newline, TokenKind::CloseCurlyBracket])?; self.bleed_whitespace()?; } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructInitialization, @@ -919,7 +879,7 @@ impl LeekParser { fn parse_binary_expression( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // TODO: Parse operator precedence (use a stack) @@ -930,24 +890,24 @@ impl LeekParser { // Binary operator children.push(terminal!(self.next_expect_is_of(vec![ - LeekTokenKind::DoubleEquals, - LeekTokenKind::LessThan, - LeekTokenKind::LessThanOrEqual, - LeekTokenKind::GreaterThan, - LeekTokenKind::GreaterThanOrEqual, - LeekTokenKind::Plus, - LeekTokenKind::Minus, - LeekTokenKind::Asterisk, - LeekTokenKind::Divide, - LeekTokenKind::Modulo, - LeekTokenKind::BitwiseXor, - LeekTokenKind::BitwiseOr, - LeekTokenKind::BitwiseAnd, - LeekTokenKind::Exponentiation, - LeekTokenKind::LeftShift, - LeekTokenKind::RightShift, - LeekTokenKind::LogicalOr, - LeekTokenKind::LogicalAnd, + TokenKind::DoubleEquals, + TokenKind::LessThan, + TokenKind::LessThanOrEqual, + TokenKind::GreaterThan, + TokenKind::GreaterThanOrEqual, + TokenKind::Plus, + TokenKind::Minus, + TokenKind::Asterisk, + TokenKind::Divide, + TokenKind::Modulo, + TokenKind::BitwiseXor, + TokenKind::BitwiseOr, + TokenKind::BitwiseAnd, + TokenKind::Exponentiation, + TokenKind::LeftShift, + TokenKind::RightShift, + TokenKind::LogicalOr, + TokenKind::LogicalAnd, ])?)); self.bleed_whitespace()?; @@ -965,7 +925,7 @@ impl LeekParser { fn parse_struct_field_access( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // Left hand expression @@ -973,11 +933,11 @@ impl LeekParser { self.bleed_whitespace()?; // Dot operator - children.push(terminal!(self.next_expect_is(LeekTokenKind::Period)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Period)?)); self.bleed_whitespace()?; // Field - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructFieldAccess, @@ -990,7 +950,7 @@ impl LeekParser { fn parse_struct_method_call( &mut self, lhs: ParseTreeNode, - ) -> Result { + ) -> Result { let mut children = Vec::new(); // Left hand expression @@ -998,11 +958,11 @@ impl LeekParser { self.bleed_whitespace()?; // Dot operator - children.push(terminal!(self.next_expect_is(LeekTokenKind::Period)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Period)?)); self.bleed_whitespace()?; // Method - let identifier = terminal!(self.next_expect_is(LeekTokenKind::Identifier)?); + let identifier = terminal!(self.next_expect_is(TokenKind::Identifier)?); children.push(self.parse_function_call_expression(identifier)?); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1013,14 +973,14 @@ impl LeekParser { /// UnaryExpression :: /// unary_operator Expression - fn parse_unary_expression(&mut self) -> Result { + fn parse_unary_expression(&mut self) -> Result { let mut children = Vec::new(); // Unary operator children.push(terminal!(self.next_expect_is_of(vec![ - LeekTokenKind::BitwiseNot, - LeekTokenKind::LogicalNot, - LeekTokenKind::Asterisk + TokenKind::BitwiseNot, + TokenKind::LogicalNot, + TokenKind::Asterisk ])?)); self.bleed_whitespace()?; @@ -1039,38 +999,38 @@ impl LeekParser { /// | ( /// `(` Expression `)` /// ) - fn parse_atom(&mut self) -> Result { + fn parse_atom(&mut self) -> Result { let mut children = Vec::new(); match self.peek_expect()?.kind { - LeekTokenKind::Identifier => { + TokenKind::Identifier => { children.push(self.parse_qualified_identifier()?); } k if k.is_literal() => { children.push(terminal!(self.next_expect()?)); } - LeekTokenKind::OpenParen => { - children.push(terminal!(self.next_expect_is(LeekTokenKind::OpenParen)?)); + TokenKind::OpenParen => { + children.push(terminal!(self.next_expect_is(TokenKind::OpenParen)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::CloseParen)?)); + children.push(terminal!(self.next_expect_is(TokenKind::CloseParen)?)); } k => { return Err(self.create_error_with_span( ParserErrorKind::UnexpectedToken { expected: vec![ - LeekTokenKind::Identifier, - LeekTokenKind::OpenParen, - LeekTokenKind::CharLiteral, - LeekTokenKind::StringLiteral, - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Octal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), - LeekTokenKind::FloatLiteral, + TokenKind::Identifier, + TokenKind::OpenParen, + TokenKind::CharLiteral, + TokenKind::StringLiteral, + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Octal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Hexadecimal), + TokenKind::IntegerLiteral(IntegerLiteralKind::Decimal), + TokenKind::FloatLiteral, ], found: k, }, @@ -1088,7 +1048,7 @@ impl LeekParser { fn parse_atom_from_identifier( &mut self, node: ParseTreeNode, - ) -> Result { + ) -> Result { Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::Atom, children: vec![node], @@ -1097,25 +1057,25 @@ impl LeekParser { /// StructDefinition :: /// `struct` identifier StructDefinitionBody? - fn parse_struct_definition(&mut self) -> Result { + fn parse_struct_definition(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Struct))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Struct))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); if self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::OpenCurlyBracket) + .is_some_and(|token| token.kind == TokenKind::OpenCurlyBracket) { self.bleed_whitespace()?; children.push(self.parse_struct_definition_body()?) } else if self.lexer.has_next()? { // If open bracket does not follow, must be None or newline - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1128,22 +1088,20 @@ impl LeekParser { /// `{` /// (TypeAssociation `\n`)* TypeAssociation /// `}` - fn parse_struct_definition_body(&mut self) -> Result { + fn parse_struct_definition_body(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!( - self.next_expect_is(LeekTokenKind::OpenCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::OpenCurlyBracket)?)); self.bleed_whitespace()?; - if self.peek_nth_ignore_whitespace_expect(0)?.kind != LeekTokenKind::CloseCurlyBracket { + if self.peek_nth_ignore_whitespace_expect(0)?.kind != TokenKind::CloseCurlyBracket { // Non `}`, so parse at last one type association children.push(self.parse_type_association()?); - while self.peek_expect_is(LeekTokenKind::Newline)? { + while self.peek_expect_is(TokenKind::Newline)? { self.bleed_whitespace()?; - if self.peek_expect_is(LeekTokenKind::CloseCurlyBracket)? { + if self.peek_expect_is(TokenKind::CloseCurlyBracket)? { break; } @@ -1151,9 +1109,7 @@ impl LeekParser { } } - children.push(terminal!( - self.next_expect_is(LeekTokenKind::CloseCurlyBracket)? - )); + children.push(terminal!(self.next_expect_is(TokenKind::CloseCurlyBracket)?)); Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { kind: ParseTreeNonTerminalKind::StructDefinitionBody, @@ -1163,13 +1119,13 @@ impl LeekParser { /// TypeAssociation :: /// (identifier `:` Type) - fn parse_type_association(&mut self) -> Result { + fn parse_type_association(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); @@ -1182,7 +1138,7 @@ impl LeekParser { /// Type :: /// QualifiedIdentifier - fn parse_type(&mut self) -> Result { + fn parse_type(&mut self) -> Result { let children = vec![self.parse_qualified_identifier()?]; Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1193,20 +1149,20 @@ impl LeekParser { /// QualifiedIdentifier :: /// identifier (`::` identifier)* - fn parse_qualified_identifier(&mut self) -> Result { + fn parse_qualified_identifier(&mut self) -> Result { let mut children = Vec::new(); - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); while self .peek_nth_ignore_whitespace(0)? - .is_some_and(|token| token.kind == LeekTokenKind::DoubleColon) + .is_some_and(|token| token.kind == TokenKind::DoubleColon) { self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::DoubleColon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::DoubleColon)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1217,30 +1173,30 @@ impl LeekParser { /// ConstantVariableDeclaration :: /// `perm` identifier `:` Type `=` Expression - fn parse_constant_variable_declaration(&mut self) -> Result { + fn parse_constant_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Perm))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Perm))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); if self.lexer.has_next()? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1251,30 +1207,30 @@ impl LeekParser { /// StaticVariableDeclaration :: /// `hold` identifier `:` Type `=` Expression - fn parse_static_variable_declaration(&mut self) -> Result { + fn parse_static_variable_declaration(&mut self) -> Result { let mut children = Vec::new(); children.push(terminal!( - self.next_expect_is(LeekTokenKind::Keyword(KeywordKind::Hold))? + self.next_expect_is(TokenKind::Keyword(KeywordKind::Hold))? )); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Identifier)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Identifier)?)); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Colon)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Colon)?)); self.bleed_whitespace()?; children.push(self.parse_type()?); self.bleed_whitespace()?; - children.push(terminal!(self.next_expect_is(LeekTokenKind::Equals)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Equals)?)); self.bleed_whitespace()?; children.push(self.parse_expression()?); if self.lexer.has_next()? { - children.push(terminal!(self.next_expect_is(LeekTokenKind::Newline)?)); + children.push(terminal!(self.next_expect_is(TokenKind::Newline)?)); } Ok(ParseTreeNode::NonTerminal(ParseTreeNodeNonTerminal { @@ -1284,7 +1240,7 @@ impl LeekParser { } /// Internal method to parse all the tokens from the internal lexer - fn parse_from_lexer(&mut self) -> Result { + fn parse_from_lexer(&mut self) -> Result { let mut children = Vec::new(); self.bleed_whitespace()?; @@ -1312,20 +1268,21 @@ mod test { use ansi_term::Color; use crate::{ - frontend::lexer::{IntegerLiteralKind, KeywordKind, LeekLexer, LeekToken, LeekTokenKind}, - frontend::reader::FileReader, + frontend::lexer::Lexer, + frontend::{ + lexer::token::{IntegerLiteralKind, KeywordKind, Token, TokenKind}, + reader::FileReader, + }, }; - use super::{ - LeekParser, ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind, Parser, - }; + use super::{ParseTreeNode, ParseTreeNodeNonTerminal, ParseTreeNonTerminalKind, Parser}; fn compare_input_to_expected(input: &str, expected_tree: ParseTreeNode) { // Collect tokens from lexer let reader = FileReader::from(input.to_owned()); - let lexer = LeekLexer::new(reader); + let lexer = Lexer::new(reader); let parse_tree = - LeekParser::parse(lexer).unwrap_or_else(|e| panic!("Could not parse input: \n{e}")); + Parser::parse(lexer).unwrap_or_else(|e| panic!("Could not parse input: \n{e}")); if parse_tree.root == expected_tree { return; @@ -1350,7 +1307,7 @@ mod test { macro_rules! terminal_from { ($kind:expr, $text:literal) => { - ParseTreeNode::Terminal(LeekToken::from(($kind, $text))) + ParseTreeNode::Terminal(Token::from(($kind, $text))) }; } @@ -1378,139 +1335,139 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "PI"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "PI"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "f32"), + terminal_from!(TokenKind::Identifier, "f32"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::FloatLiteral, + TokenKind::FloatLiteral, "3.1415926535" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "E"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "E"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "f32"), + terminal_from!(TokenKind::Identifier, "f32"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, - vec![terminal_from!(LeekTokenKind::FloatLiteral, "2.178"),] + vec![terminal_from!(TokenKind::FloatLiteral, "2.178"),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::ConstantVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Perm), "perm"), - terminal_from!(LeekTokenKind::Identifier, "THREE"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Perm), "perm"), + terminal_from!(TokenKind::Identifier, "THREE"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "u8"), + terminal_from!(TokenKind::Identifier, "u8"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Hexadecimal ), "0x03" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::StaticVariableDeclaration, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Hold), "hold"), - terminal_from!(LeekTokenKind::Identifier, "state"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Keyword(KeywordKind::Hold), "hold"), + terminal_from!(TokenKind::Identifier, "state"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "u8"), + terminal_from!(TokenKind::Identifier, "u8"), ] )] ), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral(IntegerLiteralKind::Binary), + TokenKind::IntegerLiteral(IntegerLiteralKind::Binary), "0b0001" ),] ),] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::FunctionDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Fn), "fn"), + terminal_from!(TokenKind::Keyword(KeywordKind::Fn), "fn"), non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "main"), + terminal_from!(TokenKind::Identifier, "main"), ] ), non_terminal!( ParseTreeNonTerminalKind::FunctionParameters, vec![ - terminal_from!(LeekTokenKind::OpenParen, "("), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::OpenParen, "("), + terminal_from!(TokenKind::CloseParen, ")"), ] ), non_terminal!( ParseTreeNonTerminalKind::Block, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), non_terminal!( ParseTreeNonTerminalKind::Statement, vec![ @@ -1518,17 +1475,17 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "a"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "a"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Decimal ), "1" @@ -1537,7 +1494,7 @@ mod test { ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1547,17 +1504,17 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "b"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "b"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::IntegerLiteral( + TokenKind::IntegerLiteral( IntegerLiteralKind::Decimal ), "2" @@ -1566,7 +1523,7 @@ mod test { ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1576,11 +1533,11 @@ mod test { ParseTreeNonTerminalKind::LocalVariableDeclaration, vec![ terminal_from!( - LeekTokenKind::Keyword(KeywordKind::Leak), + TokenKind::Keyword(KeywordKind::Leak), "leak" ), - terminal_from!(LeekTokenKind::Identifier, "node"), - terminal_from!(LeekTokenKind::Equals, "="), + terminal_from!(TokenKind::Identifier, "node"), + terminal_from!(TokenKind::Equals, "="), non_terminal!( ParseTreeNonTerminalKind::Expression, vec![non_terminal!( @@ -1589,10 +1546,10 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "Node"), + terminal_from!(TokenKind::Identifier, "Node"), ] ), - terminal_from!(LeekTokenKind::OpenParen, "("), + terminal_from!(TokenKind::OpenParen, "("), non_terminal!( ParseTreeNonTerminalKind::FunctionArguments, vec![non_terminal!( @@ -1600,19 +1557,19 @@ mod test { vec![non_terminal!( ParseTreeNonTerminalKind::Atom, vec![terminal_from!( - LeekTokenKind::StringLiteral, + TokenKind::StringLiteral, "\"text\"" ),] ),] ),] ), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::CloseParen, ")"), ] ),] ), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( @@ -1624,10 +1581,10 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "println"), + terminal_from!(TokenKind::Identifier, "println"), ] ), - terminal_from!(LeekTokenKind::OpenParen, "("), + terminal_from!(TokenKind::OpenParen, "("), non_terminal!( ParseTreeNonTerminalKind::FunctionArguments, vec![non_terminal!( @@ -1638,20 +1595,20 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![ - terminal_from!(LeekTokenKind::Identifier, "a"), + terminal_from!(TokenKind::Identifier, "a"), ] ) ] ),] ),] ), - terminal_from!(LeekTokenKind::CloseParen, ")"), + terminal_from!(TokenKind::CloseParen, ")"), ] ), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Newline, "\n"), ] ), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ), ] @@ -1683,21 +1640,21 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "EmptyStruct"), - terminal_from!(LeekTokenKind::Newline, "\n"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "EmptyStruct"), + terminal_from!(TokenKind::Newline, "\n"), ] ), non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "SomeStruct"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "SomeStruct"), non_terminal!( ParseTreeNonTerminalKind::StructDefinitionBody, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ) ] @@ -1705,23 +1662,23 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::StructDefinition, vec![ - terminal_from!(LeekTokenKind::Keyword(KeywordKind::Struct), "struct"), - terminal_from!(LeekTokenKind::Identifier, "BinaryTreeNode"), + terminal_from!(TokenKind::Keyword(KeywordKind::Struct), "struct"), + terminal_from!(TokenKind::Identifier, "BinaryTreeNode"), non_terminal!( ParseTreeNonTerminalKind::StructDefinitionBody, vec![ - terminal_from!(LeekTokenKind::OpenCurlyBracket, "{"), + terminal_from!(TokenKind::OpenCurlyBracket, "{"), non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "left"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "left"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "BinaryTreeNode" ),] )] @@ -1731,14 +1688,14 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "right"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "right"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "BinaryTreeNode" ),] )] @@ -1748,21 +1705,21 @@ mod test { non_terminal!( ParseTreeNonTerminalKind::TypeAssociation, vec![ - terminal_from!(LeekTokenKind::Identifier, "data"), - terminal_from!(LeekTokenKind::Colon, ":"), + terminal_from!(TokenKind::Identifier, "data"), + terminal_from!(TokenKind::Colon, ":"), non_terminal!( ParseTreeNonTerminalKind::Type, vec![non_terminal!( ParseTreeNonTerminalKind::QualifiedIdentifier, vec![terminal_from!( - LeekTokenKind::Identifier, + TokenKind::Identifier, "i32" ),] )] ), ] ), - terminal_from!(LeekTokenKind::CloseCurlyBracket, "}"), + terminal_from!(TokenKind::CloseCurlyBracket, "}"), ] ) ] diff --git a/compiler/src/frontend/position.rs b/compiler/src/frontend/position.rs index 79d3d3f..b3ed7fb 100644 --- a/compiler/src/frontend/position.rs +++ b/compiler/src/frontend/position.rs @@ -60,6 +60,18 @@ impl Span { Self { start, end } } + pub fn from_position(pos: &Position) -> Self { + let end = Position { + row: pos.row, + col: pos.col + 1, + }; + + Self { + start: pos.clone(), + end, + } + } + pub fn start(&self) -> &Position { &self.start } diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 1110f0a..8689e6c 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -3,12 +3,12 @@ use std::path::PathBuf; use clap::Parser; use leek::{ backend::codegen::CodeGenTarget, - common::config::{BuildMode, EmitMode, LeekCompilerConfig, OptimizationLevel}, + common::config::{BuildMode, CompilerConfig, EmitMode, OptimizationLevel}, }; #[derive(Parser, Debug)] #[command(author, version, about = "A bootstrap compiler for the Leek language", long_about = None)] -struct LeekCompilerArgs { +struct CompilerArgs { #[arg(required = true)] input_files: Vec, #[arg(short, long, value_enum, value_name = "EMIT_MODE", default_value_t = EmitMode::default(), help = "Specifies what kind of output to generate")] @@ -27,9 +27,9 @@ struct LeekCompilerArgs { opt_level: OptimizationLevel, } -impl From for LeekCompilerConfig { - fn from(args: LeekCompilerArgs) -> Self { - LeekCompilerConfig { +impl From for CompilerConfig { + fn from(args: CompilerArgs) -> Self { + CompilerConfig { opt_level: args.opt_level, build_mode: if args.release { BuildMode::Release @@ -46,10 +46,10 @@ impl From for LeekCompilerConfig { fn main() { // Get the command line arguments - let args = LeekCompilerArgs::parse(); + let args = CompilerArgs::parse(); // Convert to the global config struct - let config: LeekCompilerConfig = args.into(); + let config: CompilerConfig = args.into(); for file in &config.input_files { let ast = leek::frontend::parse_file(file.into()).unwrap_or_else(|e| e.report());