From 8498934a0b3e30804efd6daa9b45d86db99ad74d Mon Sep 17 00:00:00 2001 From: Oriol Linan Date: Thu, 5 Dec 2024 23:26:07 +0100 Subject: [PATCH] wip: redoing `parser` --- lib/Ast/Parser.hs | 117 ++++++++++++++++++++----------------------- lib/Ast/Tokenizer.hs | 41 --------------- lib/Ast/Types.hs | 10 ++-- 3 files changed, 60 insertions(+), 108 deletions(-) delete mode 100644 lib/Ast/Tokenizer.hs diff --git a/lib/Ast/Parser.hs b/lib/Ast/Parser.hs index 1cdf207..9cdc407 100644 --- a/lib/Ast/Parser.hs +++ b/lib/Ast/Parser.hs @@ -1,83 +1,76 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE TypeFamilies #-} - module Ast.Parser (parse) where -import qualified Ast.Tokenizer as T -import Ast.Types (AST (..), Expr) -import Control.Applicative (Alternative ((<|>))) -import Data.Foldable (Foldable (toList)) +import Ast.Types (AST (..), Expr (..), Literal (..), Operation (..)) +import Control.Applicative (Alternative (..)) import qualified Data.Void as V import qualified Text.Megaparsec as M -import qualified Text.Megaparsec.State as MST -import qualified Text.Megaparsec.Stream as MS - -newtype TokenStream = TokenStream [T.Token] - deriving (Show, Eq, Ord) +import qualified Text.Megaparsec.Char as MC +import qualified Text.Megaparsec.Char.Lexer as ML -instance MS.Stream TokenStream where - type Token TokenStream = T.Token - type Tokens TokenStream = [T.Token] +type Parser = M.Parsec V.Void String - tokenToChunk _ t = [t] +parse :: String -> Either String AST +parse input = case M.parse parseProgram "" input of + Left err -> Left (M.errorBundlePretty err) + Right tokens -> Right tokens - tokensToChunk _ ts = ts +parseProgram :: Parser AST +parseProgram = do + ast <- parseAst + M.eof + return ast - chunkToTokens _ chunk = chunk +parseAst :: Parser AST +parseAst = + lexeme $ + let parsers = [parseDefine, parseLambda, parseIf, parseCall, parseOp, parseVar, parseLit] + triedParsers = map M.try (init parsers) ++ [last parsers] + in M.choice triedParsers - chunkLength _ = length +parseLit :: Parser AST +parseLit = AST . (: []) . Lit <$> M.choice [parseInt, parseBool] - chunkEmpty _ = null +parseInt :: Parser Literal +parseInt = LInt <$> ML.signed sc ML.decimal - take1_ (TokenStream []) = Nothing - take1_ (TokenStream (x : xs)) = Just (x, TokenStream xs) +parseBool :: Parser Literal +parseBool = LBool True <$ MC.string "#t" <|> LBool False <$ MC.string "#f" - takeN_ n (TokenStream s) - | null s = Nothing - | n <= 0 = Just ([], TokenStream s) - | otherwise = Just (take n s, TokenStream $ drop n s) +parseVar :: Parser AST +parseVar = AST . (: []) . Var <$> some (MC.alphaNumChar <|> MC.symbolChar <|> M.oneOf "+-*_") - takeWhile_ f (TokenStream s) = - let (h, t) = span f s - in (h, TokenStream t) +parseDefine :: Parser AST +parseDefine = fail "" -instance MS.VisualStream TokenStream where - showTokens _ ts = unwords $ map show $ toList ts +parseCall :: Parser AST +parseCall = fail "" -instance MS.TraversableStream TokenStream where - reachOffset o pst = - let (TokenStream s) = MST.pstateInput pst - oldOffset = MST.pstateOffset pst - diff = o - oldOffset - (_, rest) = splitAt diff s - newPosState = - pst - { MST.pstateInput = TokenStream rest, - MST.pstateOffset = o - } - shownStream = show (take 10 rest) - in (Just shownStream, newPosState) +parseLambda :: Parser AST +parseLambda = fail "" -type Parser = M.Parsec V.Void TokenStream +parseIf :: Parser AST +parseIf = do + _ <- MC.string "if" <* sc + e1 <- parseAst <* sc + e2 <- parseAst <* sc + e3 <- parseAst + return $ AST . (: []) $ If e1 e2 e3 -parse :: String -> Either String AST -parse input = case T.tokenize input of - Left err -> Left err - Right tokens -> case M.parse parseProgram "" (TokenStream tokens) of - Left err -> Left (M.errorBundlePretty err) - Right ast -> Right ast +parseOp :: Parser AST +parseOp = do + op <- parseOpeartor <* sc + e1 <- parseAst <* sc + e2 <- parseAst + return $ AST . (: []) $ Op op e1 e2 -parseProgram :: Parser AST -parseProgram = do - exprs <- M.many parseExpr - M.eof - return $ AST exprs +parseOpeartor :: Parser Operation +parseOpeartor = M.choice $ (\(o, c) -> c <$ MC.string o) <$> ops -parseExpr :: Parser Expr -parseExpr = parseAtom <|> parseList +ops :: [(String, Operation)] +ops = [("+", Add), ("-", Sub), ("*", Mult), ("div", Div), (">", Gt), ("<", Lt), (">=", Gte), ("<=", Lte), ("==", Equal), ("&&", And), ("||", Or)] -parseAtom :: Parser Expr -parseAtom = fail "not defined" +sc :: Parser () +sc = ML.space MC.space1 empty empty -parseList :: Parser Expr -parseList = fail "not defined" +lexeme :: Parser a -> Parser a +lexeme = ML.lexeme sc diff --git a/lib/Ast/Tokenizer.hs b/lib/Ast/Tokenizer.hs deleted file mode 100644 index d3ca6bc..0000000 --- a/lib/Ast/Tokenizer.hs +++ /dev/null @@ -1,41 +0,0 @@ -module Ast.Tokenizer (Token (..), Separator (..), tokenize) where - -import Ast.Types (Literal (..)) -import Control.Applicative (Alternative (some), (<|>)) -import qualified Data.Void as V -import GHC.Base (Alternative (empty)) -import qualified Text.Megaparsec as M -import qualified Text.Megaparsec.Char as MC -import qualified Text.Megaparsec.Char.Lexer as ML - -type Parser = M.Parsec V.Void String - -data Separator = OpenParen | CloseParen deriving (Show, Eq, Ord) - -data Token = TLiteral Literal | TSeparator Separator deriving (Show, Eq, Ord) - -tokenize :: String -> Either String [Token] -tokenize input = case M.parse (some token <* M.eof) "" input of - Left err -> Left (M.errorBundlePretty err) - Right tokens -> Right tokens - -token :: Parser Token -token = lexeme $ M.choice [separator, boolean, number, symbol] - -sc :: Parser () -sc = ML.space MC.space1 empty empty - -lexeme :: Parser a -> Parser a -lexeme = ML.lexeme sc - -separator :: Parser Token -separator = TSeparator OpenParen <$ MC.char '(' <|> TSeparator CloseParen <$ MC.char ')' - -boolean :: Parser Token -boolean = TLiteral (LBool True) <$ MC.string "#t" <|> TLiteral (LBool False) <$ MC.string "#f" - -number :: Parser Token -number = M.try $ TLiteral . LInt <$> ML.signed (pure ()) ML.decimal - -symbol :: Parser Token -symbol = TLiteral . LSymbol <$> some (MC.alphaNumChar <|> MC.symbolChar <|> M.oneOf "+-*_") diff --git a/lib/Ast/Types.hs b/lib/Ast/Types.hs index 04f28f1..ac5f435 100644 --- a/lib/Ast/Types.hs +++ b/lib/Ast/Types.hs @@ -17,11 +17,11 @@ data Literal data Expr = Lit Literal | Var String - | Define String Expr - | Call Expr [Expr] - | Lambda [String] Expr - | If Expr Expr Expr - | Op Operation Expr Expr + | Define String AST + | Call AST [AST] + | Lambda [String] AST + | If AST AST AST + | Op Operation AST AST deriving (Show, Eq) -- | Operations supported by the language, such as addition, subtraction, etc.