Skip to content

Commit

Permalink
add: parse Assembly
Browse files Browse the repository at this point in the history
  • Loading branch information
oriollinan committed Jan 14, 2025
1 parent 231cf2a commit ff76ce8
Show file tree
Hide file tree
Showing 9 changed files with 371 additions and 237 deletions.
2 changes: 2 additions & 0 deletions glados.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ library
import: warnings
exposed-modules:
Ast.Parser
Ast.Parser.Asm
Ast.Parser.Expr
Ast.Parser.Import
Ast.Parser.Literal
Expand Down Expand Up @@ -69,6 +70,7 @@ test-suite glados-test
default-language: Haskell2010
type: exitcode-stdio-1.0
other-modules:
Ast.Parser.AsmSpec
Ast.Parser.LiteralSpec
Ast.Parser.TypeDefinitionSpec
Ast.Parser.TypeSpec
Expand Down
33 changes: 33 additions & 0 deletions lib/Ast/Parser/Asm.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
module Ast.Parser.Asm where

import qualified Ast.Parser.Utils as PU
import qualified Ast.Types as AT
import qualified Data.Maybe as DM
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as MC

parseAsm :: PU.Parser AT.Expr -> PU.Parser AT.AsmExpr
parseAsm ap = M.between (PU.symbol "{") (PU.symbol "}") $ do
code <- PU.symbol "code ->" *> PU.lexeme anyString
constraints <- PU.symbol "constraints ->" *> PU.lexeme parseAsmConstraint
args <- PU.symbol "args ->" *> M.between (PU.symbol "(") (PU.symbol ")") (M.many ap)
sideEffects <- PU.symbol "side_effects ->" *> PU.lexeme PU.parseBool
alignStack <- PU.symbol "align_stack ->" *> PU.lexeme PU.parseBool
return $ AT.AsmExpr code constraints args sideEffects alignStack

parseAsmConstraint :: PU.Parser AT.AsmConstraint
parseAsmConstraint = M.between (MC.char '"') (MC.char '"') $ do
output <- M.optional parseConstraintOutput
inputs <- M.optional $ sep *> M.sepBy parseConstraintInput sep
return $ AT.AsmConstraint (DM.fromMaybe "" output) $ DM.fromMaybe [] inputs
where
sep = MC.string ","

parseConstraintInput :: PU.Parser String
parseConstraintInput = M.choice [MC.string "r", MC.string "m"]

parseConstraintOutput :: PU.Parser String
parseConstraintOutput = MC.char '=' *> M.choice [MC.string "r", MC.string "m"]

anyString :: PU.Parser String
anyString = M.between (MC.char '\"') (MC.char '\"') $ M.many PU.parseStringChar
9 changes: 9 additions & 0 deletions lib/Ast/Parser/Expr.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Ast.Parser.Expr where

import qualified Ast.Parser.Asm as PA
import qualified Ast.Parser.Literal as PL
import qualified Ast.Parser.State as PS
import qualified Ast.Parser.Type as PT
Expand Down Expand Up @@ -93,6 +94,7 @@ parseTerm =
parseBlock id,
parseCast,
parseDefer,
parseAssembly,
M.try parseFunction,
M.try parseForeignFunction,
M.try parseDeclaration,
Expand Down Expand Up @@ -154,6 +156,7 @@ implicitReturn e@(AT.UnaryOp {}) = AT.Return (SU.getLoc e) $ Just e
implicitReturn e@(AT.StructAccess {}) = AT.Return (SU.getLoc e) $ Just e
implicitReturn e@(AT.ArrayAccess {}) = AT.Return (SU.getLoc e) $ Just e
implicitReturn e@(AT.Cast {}) = AT.Return (SU.getLoc e) $ Just e
implicitReturn e@(AT.Assembly {}) = AT.Return (SU.getLoc e) $ Just e

parseForeignFunction :: PU.Parser AT.Expr
parseForeignFunction = do
Expand Down Expand Up @@ -271,3 +274,9 @@ parseDefer = do

parseParenExpr :: PU.Parser AT.Expr
parseParenExpr = M.between (PU.symbol "(") (PU.symbol ")") parseExpr

parseAssembly :: PU.Parser AT.Expr
parseAssembly = do
srcLoc <- PU.parseSrcLoc
type' <- PU.symbol "__asm__" *> PT.parseType
AT.Assembly srcLoc type' <$> PA.parseAsm parseExpr
41 changes: 2 additions & 39 deletions lib/Ast/Parser/Literal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import qualified Ast.Parser.State as PS
import qualified Ast.Parser.Utils as PU
import qualified Ast.Types as AT
import qualified Control.Monad.State as S
import qualified Data.Char as C
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as MC
import qualified Text.Megaparsec.Char.Lexer as ML
Expand Down Expand Up @@ -44,7 +43,7 @@ parseFloat =
-- | Parses a boolean literal (`true` or `false`).
-- Returns a `Literal` of type `LBool`.
parseBool :: PU.Parser AT.Literal
parseBool = AT.LBool True <$ MC.string trueSymbol M.<|> AT.LBool False <$ MC.string falseSymbol
parseBool = AT.LBool <$> PU.parseBool

-- | Parses a character literal (e.g., 'a').
-- Returns a `Literal` of type `LChar`.
Expand All @@ -63,48 +62,12 @@ parseArray =
where
parseStringArray =
AT.LArray . map AT.LChar
<$> M.between (MC.char '\"') (MC.char '\"') (M.many parseStringChar)
<$> M.between (MC.char '\"') (MC.char '\"') (M.many PU.parseStringChar)

parseLiteralArray =
AT.LArray
<$> M.between (PU.symbol "[") (PU.symbol "]") (M.sepBy parseLiteral PU.sc)

parseStringChar =
M.choice
[ parseEscapeSequence,
M.noneOf ['"', '\\']
]

parseEscapeSequence =
MC.char '\\'
>> M.choice
[ '\a' <$ MC.char 'a',
'\b' <$ MC.char 'b',
'\f' <$ MC.char 'f',
'\n' <$ MC.char 'n',
'\r' <$ MC.char 'r',
'\t' <$ MC.char 't',
'\v' <$ MC.char 'v',
'\\' <$ MC.char '\\',
'\"' <$ MC.char '"',
'\'' <$ MC.char '\'',
'\0' <$ MC.char '0',
parseHexEscape,
parseOctalEscape
]

parseHexEscape = do
_ <- MC.char 'x'
digits <- M.count 2 hexDigit
return $ C.chr $ read ("0x" ++ digits)

parseOctalEscape = do
digits <- M.count 3 octalDigit
return $ C.chr $ read ("0o" ++ digits)

hexDigit = M.oneOf $ ['0' .. '9'] ++ ['a' .. 'f'] ++ ['A' .. 'F']
octalDigit = M.oneOf ['0' .. '7']

-- | Parses a `null` literal.
-- Returns a `Literal` of type `LNull`.
parseNull :: PU.Parser AT.Literal
Expand Down
72 changes: 72 additions & 0 deletions lib/Ast/Parser/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import qualified Ast.Parser.State as PS
import qualified Ast.Types as AT
import qualified Control.Monad.Combinators.Expr as CE
import qualified Control.Monad.State as S
import qualified Data.Char as C
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as MC
import qualified Text.Megaparsec.Char.Lexer as ML
Expand Down Expand Up @@ -63,3 +64,74 @@ postfix name f = CE.Postfix (f <$> (parseSrcLoc <* symbol name))
-- | Helper functions to define operators
binary :: String -> (AT.SrcLoc -> AT.Expr -> AT.Expr -> AT.Expr) -> CE.Operator Parser AT.Expr
binary name f = CE.InfixL (f <$> (parseSrcLoc <* symbol name))

parseBool :: Parser Bool
parseBool = True <$ MC.string "true" M.<|> False <$ MC.string "false"

parseStringChar :: Parser Char
parseStringChar =
M.choice
[ parseEscapeSequence,
M.noneOf ['"', '\\']
]

parseEscapeSequence :: Parser Char
parseEscapeSequence =
MC.char '\\'
>> M.choice
[ '\a' <$ MC.char 'a',
'\b' <$ MC.char 'b',
'\f' <$ MC.char 'f',
'\n' <$ MC.char 'n',
'\r' <$ MC.char 'r',
'\t' <$ MC.char 't',
'\v' <$ MC.char 'v',
'\\' <$ MC.char '\\',
'\"' <$ MC.char '"',
'\'' <$ MC.char '\'',
'\0' <$ MC.char '0',
parseHexEscape,
parseOctalEscape
]

parseHexEscape :: Parser Char
parseHexEscape = do
_ <- MC.char 'x'
digits <- M.count 2 hexDigit
return $ C.chr $ read ("0x" ++ digits)

parseOctalEscape :: Parser Char
parseOctalEscape = do
digits <- M.count 3 octalDigit
return $ C.chr $ read ("0o" ++ digits)

hexDigit :: Parser Char
hexDigit = M.oneOf $ ['0' .. '9'] ++ ['a' .. 'f'] ++ ['A' .. 'F']

octalDigit :: Parser Char
octalDigit = M.oneOf ['0' .. '7']

normalizeLoc :: AT.SrcLoc
normalizeLoc = AT.SrcLoc "" 0 0

normalizeExpr :: AT.Expr -> AT.Expr
normalizeExpr (AT.Lit _ lit) = AT.Lit normalizeLoc lit
normalizeExpr (AT.Var _ name t) = AT.Var normalizeLoc name t
normalizeExpr (AT.Function _ name t params body) = AT.Function normalizeLoc name t params (normalizeExpr body)
normalizeExpr (AT.Declaration _ name t initVal) = AT.Declaration normalizeLoc name t (fmap normalizeExpr initVal)
normalizeExpr (AT.Assignment _ target value) = AT.Assignment normalizeLoc (normalizeExpr target) (normalizeExpr value)
normalizeExpr (AT.Call _ func args) = AT.Call normalizeLoc (normalizeExpr func) (map normalizeExpr args)
normalizeExpr (AT.If _ cond thenBranch elseBranch) = AT.If normalizeLoc (normalizeExpr cond) (normalizeExpr thenBranch) (fmap normalizeExpr elseBranch)
normalizeExpr (AT.Block exprs) = AT.Block (map normalizeExpr exprs)
normalizeExpr (AT.Return _ value) = AT.Return normalizeLoc (fmap normalizeExpr value)
normalizeExpr (AT.Op _ op e1 e2) = AT.Op normalizeLoc op (normalizeExpr e1) (normalizeExpr e2)
normalizeExpr (AT.UnaryOp _ op e) = AT.UnaryOp normalizeLoc op (normalizeExpr e)
normalizeExpr (AT.For _ i c s b) = AT.For normalizeLoc (normalizeExpr i) (normalizeExpr c) (normalizeExpr s) (normalizeExpr b)
normalizeExpr (AT.While _ c b) = AT.While normalizeLoc (normalizeExpr c) (normalizeExpr b)
normalizeExpr (AT.Continue _) = AT.Continue normalizeLoc
normalizeExpr (AT.Break _) = AT.Break normalizeLoc
normalizeExpr (AT.StructAccess _ e1 e2) = AT.StructAccess normalizeLoc (normalizeExpr e1) (normalizeExpr e2)
normalizeExpr (AT.ArrayAccess _ e1 e2) = AT.ArrayAccess normalizeLoc (normalizeExpr e1) (normalizeExpr e2)
normalizeExpr (AT.Cast _ t e) = AT.Cast normalizeLoc t (normalizeExpr e)
normalizeExpr (AT.ForeignFunction _ n t) = AT.ForeignFunction normalizeLoc n t
normalizeExpr (AT.Assembly _ t a) = AT.Assembly normalizeLoc t a
22 changes: 22 additions & 0 deletions lib/Ast/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,23 @@ data Type
| TUnknown
deriving (Show, Eq, Ord)

-- | Assembly constraint type
data AsmConstraint = AsmConstraint
{ outputConstraint :: String,
inputConstraints :: [String]
}
deriving (Show, Eq, Ord)

-- | Assembly expression type
data AsmExpr = AsmExpr
{ asmCode :: String,
asmConstraints :: AsmConstraint,
asmArgs :: [Expr],
asmSideEffects :: Bool,
asmAlignStack :: Bool
}
deriving (Show, Eq, Ord)

-- | Enhanced expression nodes
-- | StructAccess: For accessing struct fields
-- | ArrayAccess: For array indexing
Expand Down Expand Up @@ -113,6 +130,11 @@ data Expr
| StructAccess SrcLoc Expr Expr
| ArrayAccess SrcLoc Expr Expr
| Cast SrcLoc Type Expr
| Assembly
{ asmLoc :: SrcLoc,
asmReturnType :: Type,
asmExpr :: AsmExpr
}
deriving (Show, Eq, Ord)

-- | Enhanced operations including bitwise operations
Expand Down
1 change: 1 addition & 0 deletions lib/Shared/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ getLoc expr = case expr of
AT.Function loc _ _ _ _ -> loc
AT.ForeignFunction loc _ _ -> loc
AT.Block exprs -> getLoc $ head exprs
AT.Assembly loc _ _ -> loc
44 changes: 44 additions & 0 deletions test/Ast/Parser/AsmSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
module Ast.Parser.AsmSpec (spec) where

import qualified Ast.Parser.Asm as PA
import qualified Ast.Parser.Expr as PE
import qualified Ast.Parser.State as PS
import qualified Ast.Parser.Utils as PU
import qualified Ast.Types as AT
import qualified Control.Monad.State as S
import Test.Hspec
import qualified Text.Megaparsec as M

spec :: Spec
spec = do
let parse input = do
(result, _) <- S.runStateT (M.runParserT (PA.parseAsm PE.parseExpr) "" input) PS.parserState
return result
let normalizeAsm asm = asm {AT.asmArgs = map PU.normalizeExpr $ AT.asmArgs asm}

describe "parseAsm" $ do
it "parses a simple assembly expression" $ do
let input = "{ code -> \"nop\" constraints -> \"\" args -> () side_effects -> false align_stack -> false }"
result <- parse input
let expected = Right $ AT.AsmExpr "nop" (AT.AsmConstraint "" []) [] False False
result `shouldBe` expected

it "parses a move expression" $ do
let input = "{ code -> \"mov $0, 42\" constraints -> \"=r\" args -> () side_effects -> false align_stack -> false }"
result <- parse input
let expected = Right $ AT.AsmExpr "mov $0, 42" (AT.AsmConstraint "r" []) [] False False
result `shouldBe` expected

it "parses an add expression" $ do
let input = "{ code -> \"add $0, $1\" constraints -> \"=r,r\" args -> (a b) side_effects -> false align_stack -> false }"
result <- parse input
let normalizedResult = normalizeAsm <$> result
let expected = Right $ AT.AsmExpr "add $0, $1" (AT.AsmConstraint "r" ["r"]) [AT.Var PU.normalizeLoc "a" AT.TUnknown, AT.Var PU.normalizeLoc "b" AT.TUnknown] False False
normalizedResult `shouldBe` expected

it "parses an complex expression" $ do
let input = "{ code -> \"mov $0, 0x1b; call printf\" constraints -> \"\" args -> (\"\x1b[H\") side_effects -> true align_stack -> false }"
result <- parse input
let normalizedResult = normalizeAsm <$> result
let expected = Right $ AT.AsmExpr "mov $0, 0x1b; call printf" (AT.AsmConstraint "" []) [AT.Lit PU.normalizeLoc $ AT.LArray [AT.LChar '\x1b', AT.LChar '[', AT.LChar 'H']] True False
normalizedResult `shouldBe` expected
Loading

0 comments on commit ff76ce8

Please # to comment.