-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #13 from EpitechPromo2027/7-parse-token
Parse token
- Loading branch information
Showing
3 changed files
with
70 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
module Ast.Tokenizer (Token (..), Literal (..), Separator (..), tokenize) where | ||
|
||
import Control.Applicative (Alternative (some), (<|>)) | ||
import qualified Data.Void as V | ||
import GHC.Base (Alternative (empty)) | ||
import qualified Text.Megaparsec as M | ||
import qualified Text.Megaparsec.Char as MC | ||
import qualified Text.Megaparsec.Char.Lexer as ML | ||
|
||
type Parser = M.Parsec V.Void String | ||
|
||
data Separator = OpenParen | CloseParen deriving (Show, Eq) | ||
|
||
data Literal = LNumber Int | LBoolean Bool | LSymbol String deriving (Show, Eq) | ||
|
||
data Token = TLiteral Literal | TSeparator Separator deriving (Show, Eq) | ||
|
||
tokenize :: String -> Either String [Token] | ||
tokenize input = case M.parse (some token <* M.eof) "" input of | ||
Left err -> Left (M.errorBundlePretty err) | ||
Right tokens -> Right tokens | ||
|
||
token :: Parser Token | ||
token = lexeme $ M.choice [separator, boolean, number, symbol] | ||
|
||
sc :: Parser () | ||
sc = ML.space MC.space1 empty empty | ||
|
||
lexeme :: Parser a -> Parser a | ||
lexeme = ML.lexeme sc | ||
|
||
separator :: Parser Token | ||
separator = TSeparator OpenParen <$ MC.char '(' <|> TSeparator CloseParen <$ MC.char ')' | ||
|
||
boolean :: Parser Token | ||
boolean = TLiteral (LBoolean True) <$ MC.string "#t" <|> TLiteral (LBoolean False) <$ MC.string "#f" | ||
|
||
number :: Parser Token | ||
number = M.try $ TLiteral . LNumber <$> ML.signed (pure ()) ML.decimal | ||
|
||
symbol :: Parser Token | ||
symbol = TLiteral . LSymbol <$> some (MC.alphaNumChar <|> MC.symbolChar <|> M.oneOf "+-*_") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
module Ast.TokenizerSpec (spec) where | ||
|
||
import Ast.Tokenizer (Literal (..), Separator (..), Token (..), tokenize) | ||
import Test.Hspec (Spec, describe, it, shouldBe) | ||
|
||
spec :: Spec | ||
spec = do | ||
describe "Ast.Tokenizer" $ do | ||
it "parses a simple number" $ do | ||
tokenize "42" `shouldBe` Right [TLiteral (LNumber 42)] | ||
|
||
it "parses a boolean" $ do | ||
tokenize "#t" `shouldBe` Right [TLiteral (LBoolean True)] | ||
|
||
it "parses symbols" $ do | ||
tokenize "foo" `shouldBe` Right [TLiteral (LSymbol "foo")] | ||
|
||
it "parses parentheses" $ do | ||
tokenize "()" `shouldBe` Right [TSeparator OpenParen, TSeparator CloseParen] | ||
|
||
it "handles mixed input" $ do | ||
tokenize "(+ 1 2)" | ||
`shouldBe` Right [TSeparator OpenParen, TLiteral (LSymbol "+"), TLiteral (LNumber 1), TLiteral (LNumber 2), TSeparator CloseParen] |