Skip to content

Commit

Permalink
Merge pull request #13 from EpitechPromo2027/7-parse-token
Browse files Browse the repository at this point in the history
Parse token
  • Loading branch information
oriollinan authored Dec 2, 2024
2 parents c1a5f9b + 7d588e0 commit 72460ca
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 1 deletion.
6 changes: 5 additions & 1 deletion glados.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ library
import: warnings
exposed-modules:
Ast.Types
Ast.Tokenizer
Codegen.Codegen
Misc

Expand All @@ -31,6 +32,7 @@ library
containers >=0.6.7 && <0.7,
llvm-hs-pretty >=0.9.0 && <0.10,
llvm-hs-pure >=9.0.0 && <9.1,
megaparsec >= 9.7.0

hs-source-dirs: lib
default-language: Haskell2010
Expand All @@ -51,7 +53,9 @@ test-suite glados-test
import: warnings
default-language: Haskell2010
type: exitcode-stdio-1.0
other-modules: Misc.MiscSpec
other-modules:
Misc.MiscSpec,
Ast.TokenizerSpec
hs-source-dirs: test
main-is: Spec.hs
build-depends:
Expand Down
42 changes: 42 additions & 0 deletions lib/Ast/Tokenizer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
module Ast.Tokenizer (Token (..), Literal (..), Separator (..), tokenize) where

import Control.Applicative (Alternative (some), (<|>))
import qualified Data.Void as V
import GHC.Base (Alternative (empty))
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as MC
import qualified Text.Megaparsec.Char.Lexer as ML

type Parser = M.Parsec V.Void String

data Separator = OpenParen | CloseParen deriving (Show, Eq)

data Literal = LNumber Int | LBoolean Bool | LSymbol String deriving (Show, Eq)

data Token = TLiteral Literal | TSeparator Separator deriving (Show, Eq)

tokenize :: String -> Either String [Token]
tokenize input = case M.parse (some token <* M.eof) "" input of
Left err -> Left (M.errorBundlePretty err)
Right tokens -> Right tokens

token :: Parser Token
token = lexeme $ M.choice [separator, boolean, number, symbol]

sc :: Parser ()
sc = ML.space MC.space1 empty empty

lexeme :: Parser a -> Parser a
lexeme = ML.lexeme sc

separator :: Parser Token
separator = TSeparator OpenParen <$ MC.char '(' <|> TSeparator CloseParen <$ MC.char ')'

boolean :: Parser Token
boolean = TLiteral (LBoolean True) <$ MC.string "#t" <|> TLiteral (LBoolean False) <$ MC.string "#f"

number :: Parser Token
number = M.try $ TLiteral . LNumber <$> ML.signed (pure ()) ML.decimal

symbol :: Parser Token
symbol = TLiteral . LSymbol <$> some (MC.alphaNumChar <|> MC.symbolChar <|> M.oneOf "+-*_")
23 changes: 23 additions & 0 deletions test/Ast/TokenizerSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module Ast.TokenizerSpec (spec) where

import Ast.Tokenizer (Literal (..), Separator (..), Token (..), tokenize)
import Test.Hspec (Spec, describe, it, shouldBe)

spec :: Spec
spec = do
describe "Ast.Tokenizer" $ do
it "parses a simple number" $ do
tokenize "42" `shouldBe` Right [TLiteral (LNumber 42)]

it "parses a boolean" $ do
tokenize "#t" `shouldBe` Right [TLiteral (LBoolean True)]

it "parses symbols" $ do
tokenize "foo" `shouldBe` Right [TLiteral (LSymbol "foo")]

it "parses parentheses" $ do
tokenize "()" `shouldBe` Right [TSeparator OpenParen, TSeparator CloseParen]

it "handles mixed input" $ do
tokenize "(+ 1 2)"
`shouldBe` Right [TSeparator OpenParen, TLiteral (LSymbol "+"), TLiteral (LNumber 1), TLiteral (LNumber 2), TSeparator CloseParen]

0 comments on commit 72460ca

Please # to comment.