-
Notifications
You must be signed in to change notification settings - Fork 14
/
dockerfile_parser.pest
159 lines (130 loc) · 5.26 KB
/
dockerfile_parser.pest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// (C) Copyright 2019 Hewlett Packard Enterprise Development LP
// partially derived from the TOML example:
// https://github.com/pest-parser/pest/blob/master/grammars/src/grammars/toml.pest
dockerfile = { SOI ~ meta_step ~ (NEWLINE ~ meta_step)* ~ EOI }
meta_step = _{ ws* ~ (step | comment)? ~ ws* }
step = _{
(
from |
run |
arg |
label |
copy |
entrypoint |
cmd |
env |
// todos:
// add | workdir | user
// things that we probably won't bother supporting
// expose | volume | onbuild | stopsignal | healthcheck | shell
// deprecated: maintainer
// catchall for unsupported directives
misc
)
}
// insignificant whitespace, not repeated
ws = _{ " " | "\t" }
comment = @{ "#" ~ (!NEWLINE ~ ANY)* }
comment_line = _{ ws* ~ comment ~ NEWLINE? }
empty_line = _{ ws* ~ NEWLINE }
double_quoted_string = @{ "\"" ~ inner ~ "\"" }
inner = @{ (!("\"" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ inner)? }
escape = @{ "\\" ~ ("b" | "t" | "n" | "f" | "r" | "\"" | "\\" | "'" | unicode | NEWLINE)? }
unicode = @{ "u" ~ ASCII_HEX_DIGIT{4} | "U" ~ ASCII_HEX_DIGIT{8} }
single_quoted_string = @{ "'" ~ single_quoted_inner ~ "'" }
single_quoted_inner = @{ (!("'" | "\\" | "\u{0000}" | "\u{001F}") ~ ANY)* ~ (escape ~ single_quoted_inner)? }
string = ${ single_quoted_string | double_quoted_string }
// a line continuation, allowing an instruction to continue onto a new line
line_continuation = _{ "\\" ~ ws* ~ NEWLINE }
// whitespace that may appear between instruction arguments
// this allows instructions to expand past a newline if escaped
arg_ws = _{ (ws | line_continuation ~ (comment_line | empty_line)*)+ }
// like arg_ws, but where whitespace is optional
arg_ws_maybe = _{ (ws | line_continuation ~ (comment_line | empty_line)*)* }
// continues consuming input beyond a newline, if the newline is preceeded by an
// escape (\)
// these tokens need to be preserved in the final tree so they can be handled
// appropraitely; pest's ignore rules aren't sufficient for our needs
any_content = @{
(
!NEWLINE ~
!line_continuation ~
ANY
)+
}
any_breakable = ${
(
// can be any comment string (no line continuation required)
comment_line ~ any_breakable?
) | (
// ... OR some piece of content, requiring a continuation EXCEPT on the
// final line
any_content ~ (line_continuation ~ any_breakable)?
)
}
// consumes any character until the end of the line
any_eol = _{ (!NEWLINE ~ ANY)* }
// consumes all characters until the next whitespace
any_whitespace = _{ (!(NEWLINE | EOI | arg_ws) ~ ANY)+ }
// consumes identifier characters until the next whitespace
identifier_whitespace = _{ (!ws ~ (ASCII_ALPHANUMERIC | "_" | "-"))+ }
// consumes until whitespace or = (for key in key=value pairs)
any_equals = _{ (!(NEWLINE | ws | "=") ~ ANY)+ }
// parses ["foo", "bar", "baz"] with excessive escaping
string_array = _{
(
"[" ~
arg_ws_maybe ~ string ~
(arg_ws_maybe ~ "," ~ arg_ws_maybe ~ string)* ~
","? ~
arg_ws_maybe ~ "]"
) | "[" ~ arg_ws_maybe ~ "]"
}
from_flag_name = @{ ASCII_ALPHA+ }
from_flag_value = @{ any_whitespace }
from_flag = { "--" ~ from_flag_name ~ "=" ~ from_flag_value }
from_image = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | ":" | "/" | "$" | "{" | "}" | "@")+ }
from_alias = { identifier_whitespace }
from_alias_outer = _{ arg_ws ~ ^"as" ~ arg_ws ~ from_alias }
from = { ^"from" ~ (arg_ws ~ from_flag)* ~ arg_ws ~ from_image ~ from_alias_outer? }
arg_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
arg_value = ${ any_whitespace }
arg_quoted_value = ${ string }
arg = { ^"arg" ~ arg_ws ~ arg_name ~ ("=" ~ (arg_quoted_value | arg_value))? }
label_name = ${ any_equals }
label_quoted_name = ${ string }
label_value = ${ any_whitespace }
label_quoted_value = ${ string }
label_pair = {
(label_quoted_name | label_name) ~ "=" ~ (label_quoted_value | label_value)
}
label_single_name = { any_equals }
label_single_quoted_name = { string }
label_single = { arg_ws ~ (label_single_quoted_name | label_single_name) ~ arg_ws ~ (label_quoted_value | label_value) }
label = { ^"label" ~ (label_single | (arg_ws ~ label_pair?)+) }
run_shell = @{ any_breakable }
run_exec = { string_array }
run = { ^"run" ~ arg_ws ~ (run_exec | run_shell) }
entrypoint_shell = @{ any_breakable }
entrypoint_exec = { string_array }
entrypoint = { ^"entrypoint" ~ arg_ws ~ (entrypoint_exec | entrypoint_shell) }
cmd_shell = @{ any_breakable }
cmd_exec = { string_array }
cmd = { ^"cmd" ~ arg_ws ~ (cmd_exec | cmd_shell) }
copy_flag_name = @{ ASCII_ALPHA+ }
copy_flag_value = @{ any_whitespace }
copy_flag = { "--" ~ copy_flag_name ~ "=" ~ copy_flag_value }
copy_pathspec = @{ any_whitespace }
copy = { ^"copy" ~ (arg_ws ~ copy_flag)* ~ (arg_ws ~ copy_pathspec){2,} }
env_name = ${ (ASCII_ALPHANUMERIC | "_")+ }
env_pair_value = ${ any_whitespace }
env_pair_quoted_value = ${ string }
env_pair = @{ env_name ~ "=" ~ (env_pair_quoted_value | env_pair_value) }
env_pairs = { (arg_ws ~ env_pair?)+ }
env_single_quoted_value = ${ string }
env_single_value = @{ any_breakable }
env_single = { arg_ws ~ env_name ~ arg_ws ~ (env_single_quoted_value | env_single_value) }
env = { ^"env" ~ (env_single | env_pairs) }
misc_instruction = @{ ASCII_ALPHA+ }
misc_arguments = @{ any_breakable }
misc = { misc_instruction ~ misc_arguments }