-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgcc-options-parser.sh.mako
451 lines (383 loc) · 12.3 KB
/
gcc-options-parser.sh.mako
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
<%
# mako template for gcc-options-parser.bash
# mako is a superset of python
# config
# the "gcc" folder is in this folder:
gcc_src_dir = "./gcc"
import re, sys, os, glob
def separate_options_of_gcc_opt_file(gcc_opt_file):
"""
return only the "separate" options, which consume the next argument
"""
print(f"gcc_opt_file = {gcc_opt_file}")
opts = []
opt = null
for line in open(gcc_opt_file, "r").readlines():
line = line.strip()
if line == "":
if opt != null:
if opt[0] != "Variable" and opt[0] != "###" and len(opt) > 1: # TODO parse sections? ###\nDriver etc
# ignore: Variable\nint var_name
# ignore: Mask(SVINTO)
opts.append(opt[:])
opt = null
continue
if line[0] == ";":
continue
if opt == null:
opt = []
opt.append(line)
if opt != null:
if opt[0] != "Variable" and opt[0] != "###" and len(opt) > 1: # TODO parse sections? ###\nDriver etc
opts.append(opt[:])
opt = null
#print("debug: opts = " + repr(opts))
for i in range(len(opts)):
opt = opts[i]
long_name = opt[0]
long_name = long_name.split("=")[0]
long_name = long_name.split(",")[0]
# TODO better
# probably can ignore these (unary options)
long_name = "-" + long_name
description = null
try:
description = opt[2]
except IndexError:
pass
opts_opts = {}
for m in re.findall(r"([^() ]+)(?:\(([^()]+)\))?", opt[1]):
if True:
m = [null, m[0], m[1]]
if m[2]:
opts_opts[m[1]] = m[2]
else:
opts_opts[m[1]] = True
# test -> ok
#if "Separate" in opts_opts:
# print(repr(opts_opts))
variable_name = null
try:
variable_name = opts_opts["Var"]
except KeyError:
variable_name = long_name.replace("-", "_")
# JoinedOrMissing: -time=timeval or time= (empty value)
takes_value = False
if "Joined" in opts_opts or "Separate" in opts_opts or "JoinedOrMissing" in opts_opts:
takes_value = True
short_name = null
if len(long_name) == 2:
short_name = long_name
long_name = null
opts[i] = {
"variable_name": variable_name, # TODO use this
"short_name": short_name,
"long_name": long_name,
"takes_value": False,
"help_text": description,
"opts_opts": opts_opts,
}
separate_options = []
# TODO remove short/long name parsing, not used
for opt in filter(lambda opt: "Separate" in opt["opts_opts"], opts):
if opt["short_name"]:
separate_options.append(opt["short_name"])
if opt["long_name"]:
separate_options.append(opt["long_name"])
# TODO later sort by length ascending
# filter unique, keep order
#opts = list(dict.fromkeys(opts))
# TODO later do this
return separate_options
separate_options = []
for gcc_opt_file in glob.glob(gcc_src_dir+"/gcc/**/*.opt", recursive=True):
separate_options += separate_options_of_gcc_opt_file(gcc_opt_file)
# remove special cases
separate_options = filter(lambda s: s!="-o" and s!="-x", separate_options)
# filter unique, change order
separate_options = list(set(separate_options))
# sort by length ascending
separate_options = sorted(separate_options, key=lambda s: len(s))
%>\
#! /usr/bin/env bash
# NOTE this file was generated by gcc-options-parser.sh.mako
# TODO remove preprocessor args from the final compiler call -> -D -I ...
remove_linemarkers=true # add -P to preprocessor call. this breaks debug info
dry_run=true # dont call gcc, just print args
gccPath="$1"
shift
args=("$@")
# used for all input files
constArgs=()
globalArgIdxList=()
# default values
inPathList=()
inLangList=() # TODO use these in the last compile/assemble/link step only for the unprocessed files
inLang=none<%doc>
default language depends on the compiler.
g++ -> default language is c++, agnostic of file extension.
-> g++ will parse *.c files as c++ files.
see https://github.com/mozilla/sccache/issues/748
</%doc>
#oPath=a.out
oPath=
<%doc>
see "-o file" in https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.html
> If -o is not specified, the default is to put
> an executable file in a.out,
> the object file for source.suffix in source.o,
> its assembler file in source.s,
> a precompiled header file in source.suffix.gch,
> and all preprocessed C source on standard output.
</%doc>
<%doc>
C/C++ file extensions
based on gcc/cp/lang-specs.h
docs https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.html#Options-Controlling-the-Kind-of-Output
gcc is case-sensitive here
this is an "inverse regex pattern"
the leading + trailing spaces are required
</%doc>
cLangExtPatt=" c h C H cc hh cpp hpp cxx hxx c++ h++ CPP HPP cp hp tcc "
declare -A tmpExtOfInExt
tmpExtOfInExt=( [c]=i [h]=gch [C]=ii [H]=gch [cc]=ii [hh]=gch [cpp]=ii [hpp]=gch [cxx]=ii [hxx]=gch [c++]=ii [h++]=gch [CPP]=ii [HPP]=gch [cp]=ii [hp]=gch [tcc]=ii )
declare -A tmpExtOfInLang
tmpExtOfInLang=( [c]=i [c-header]=gch [cpp-output]=o [c++]=ii [c++-header]=gch [c++-system-header]=gch [c++-user-header]=gch [c++-cpp-output]=o )
# TODO verify: cpp-output = C PreProcessor output? -> tmpExt is o?
# TODO verify: c++-cpp-output = C++ PreProcessor output? -> tmpExt is o?
# when should gcc stop?
<%doc> https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.html </%doc>
stopE=
stopS=
stopC=
${"for ((i = 0; i < ${#args[@]}; i++ ))"}
do
${"a=${args[$i]}"}
case "$a" in
-o*)
[ -n "$oPath" ] && { echo "error: can have only one output. old: $oPath. new: $a"; exit 1; }
if [ "$a" != "-o" ]; then ${"oPath=${a:2}"}; else : $((i++)); ${"oPath=${args[$i]}"}; fi
#echo "o: $oPath"
;;
-x*)
if [ "$a" != "-x" ]; then ${"inLang=${a:2}"}; else : $((i++)); ${"inLang=${args[$i]}"}; fi
#echo "f: $inLang"
;;
-E) stopE=1;;
-S) stopS=1;;
-c) stopC=1;;
-frandom-seed=*);; # ignore
${"|".join(separate_options)})<%doc>
note: -c -E -S ... are missing, cos they are "unary" options = dont consume the next argument.
we parse only some of these unary options, the rest is passed through to gcc.
</%doc>
globalArgIdxList+=($i)
: $((i++))
globalArgIdxList+=($i)
${"b=${args[$i]}"}
#echo "2: $a $b"
;;
-*)
#echo "1: $a"
globalArgIdxList+=($i)
;;
@*)<%doc>
@file is a "Joined only" option. see https://gcc.gnu.org/onlinedocs/gcc/Overall-Options.htm
</%doc>
${ 'argsFile="${a:1}"' }
[ ! -e "$argsFile" ] && { echo "error parsing option $a: no such file"; exit 1; }
eval "fileArgs=( $(cat "$argsFile") )" # WARNING eval is unsafe
${ 'args=( "${args[@]:0:$i}" "${fileArgs[@]}" "${args[@]:$((i + 1))}" )' }<%doc> replace the @file argument </%doc>
${ 'argsLen=${#args[@]}' }<%doc> update length </%doc>
: $((i--))<%doc> re-parse the replaced argument </%doc>
;;
*)
inPathList+=("$a")
if [ ! -e "$a" ];
then
echo "error: missing input file: $a"
exit 1
fi
inPathIdxList+=("$i")
if [ "$inLang" = "none" ]
then<%doc> parse language from file extension </%doc>
${ 'ext="${a##*.}"' }
if [ "$ext" = "$a" ]; then inLangList+=("_ld")<%doc>default: linker script. but "ld" is not supported by the "-x language" option</%doc>
elif [[ "$cLangExtPatt" = *" $ext "* ]]; then inLangList+=("_cfam")<%doc>C family = C or C++</%doc>
else inLangList+=("_not_cfam")<%doc>here we only care for "cfam or not cfam"</%doc>
fi
else
inLangList+=("$inLang")
fi
${ '#echo "i: $a [format: ${inLangList[ -1]}]"' }
;;
esac
done
${"""
if [[ $stopE || $stopS ]];
then
echo "dont preprocess <- stopE=$stopE stopS=$stopS stopC=$stopC"
echo "$gccPath" "${args[@]}"
$dry_run || "$gccPath" "${args[@]}"
exit
fi
"""}
# split the gcc command line -> one call per source file
# NOT. TODO run gcc calls in parallel # not: already done by cmake
if false; then
echo original args
${ 'for (( i=0; i<${#args[@]}; i++ ))' }
do
${ 'echo "arg $i: ${args[$i]}"' }
done
echo global args
${ 'for i in ${globalArgIdxList[@]}' }
do
${ 'echo "arg $i: ${args[$i]}"' }
done
fi
tmpPathIdxList=()
tmpPathList=()
tmpLangList=()
# TODO preprocess only C/C++ sources
echo preprocess args:
${ 'for (( i=0; i<${#inPathList[@]}; i++ ))' }
do
${ 'inPathIdx=${inPathIdxList[$i]}' }
${ 'inPath=${inPathList[$i]}' }
${ 'inLang=${inLangList[$i]}' }
tmpPathIdxList+=($inPathIdx)
if [[ "$inLang" != "_ld" && "$inLang" != "_not_cfam" ]]
then
#echo "arg $inPathIdx -> input $i: path $inPath + lang = $inLang"
inArgs=()
doneInPath=
${ 'for idx in ${globalArgIdxList[@]}; do' }
if [[ ! $doneInPath && $idx -gt $inPathIdx ]]; then
# insert input-path argument at original index
if [ "$inLang" != "_cfam" ]; then
inArgs+=(-x "$inLang")
fi
inArgs+=("$inPath")
doneInPath=1
fi
${ 'inArgs+=("${args[$idx]}")' }
done
# insert input-path argument at end
if [[ ! $doneInPath ]]; then
inArgs+=("$inPath")
fi
${ 'inExt=${inPath##*.}' }
${ 'tmpExt=${tmpExtOfInExt[$inExt]}' }
#echo "tmpExt = $tmpExt from ext $inExt"
# if language was set ...
if [ "$inLang" != "_cfam" ]; then
${ 'tmpExt=${tmpExtOfInLang[$inLang]}' }
#echo "tmpExt = $tmpExt from lang $inLang"
if [ -z "$tmpExt" ]; then
# not a cfam language -> dont preprocess
#echo "dont preprocess input $i: path $inPath + lang = $inLang"
tmpPathList+=("$inPath")
tmpLangList+=("$inLang")
continue
fi
fi
#tmpName=$(echo "$inPath" | tr / _)
tmpName="$(basename "$inPath")"
${ 'tmpName=${tmpName%.*}' }
${ '[ ${#tmpName} -gt 200 ] && tmpName=${tmpName: -200} # max 255 chars' }
#tmpPath="/tmp/$tmpName.$tmpExt"
#tmpPath="$(mktemp "/tmp/$tmpName-XXXXX.$tmpExt")" # must not be random! reproducible builds.
tmpPath="/tmp/$(nix-hash --base32 "$inPath")-$tmpName.$tmpExt"
# nix-hash -> 26 chars
# 255 - 1 - 26 = 228
tmpPathList+=("$tmpPath")
tmpLangList+=("_cfam_prep") # prep = preprocessed
inArgs+=("-o" "$tmpPath")
inArgs+=("-E") # stop after preprocess
$remove_linemarkers && inArgs+=("-P") # remove linemarkers<%doc>
remove linemarkers = remove input file paths from preprocessor output.
linemarkers are needed for debugging https://github.com/mozilla/sccache/issues/1026
</%doc>#
inArgs+=("-frandom-seed=$tmpPath")<%doc>
bazel: -frandom-seed=%{output_file}
FIXME should be constant. input/output paths can be variable -> make paths relative to CCACHE_BASEDIR
</%doc>
${"""
echo "$gccPath" "${inArgs[@]}"
$dry_run || "$gccPath" "${inArgs[@]}"
"""}
# TODO run gcc
# TODO run gcc in background, wait for all to finish
# TODO patch all temp files in one sed call
# TODO
else
#echo "dont preprocess input $i: path $inPath + lang = $inLang"
tmpPathList+=("$inPath")
tmpLangList+=("$inLang")
fi
done
${"""
# array_indexof without echo
function array_contains() {
[ $# -lt 2 ] && return 1
local a=("$@")
local v="${a[-1]}"
unset a[-1]
local i
for i in ${!a[@]}; do
if [ "${a[$i]}" = "$v" ]; then
#echo $i
return 0 # stop after first match
fi
done
return 1
}
# https://stackoverflow.com/a/70793702/10440128
function array_indexof() {
[ $# -lt 2 ] && return 1
local a=("$@")
local v="${a[-1]}"
unset a[-1]
local i
for i in ${!a[@]}; do
if [ "${a[$i]}" = "$v" ]; then
echo $i
return 0 # stop after first match
fi
done
return 1
}
"""}
inArgs=()
${ 'iMax=${#args[@]}' }
for (( i=0; i<$iMax; i++ )); do
${ 'a="${args[$i]}"' }
#echo "i = $i + a = $a" # debug
${ 'if array_contains "${globalArgIdxList[@]}" $i' }
then
${ 'inArgs+=("$a")' }
else
${ 'tmpIdx=$(array_indexof "${inPathIdxList[@]}" $i)' }
${ '[ -n "$tmpIdx" ] && inArgs+=(${tmpPathList[$tmpIdx]})' }
fi
done
${"""
# TODO add output arg?
# TODO add -frandom-seed=xxx arg?
if [ -n "$oPath" ]; then
inArgs+=("-o" "$oPath")
fi
if [[ $stopC ]]; then
inArgs+=("-c")
fi
# fix sort order
export LC_ALL=C
export LANG=C
# TODO maybe avoid hashing file paths, instead, hash the file contents
randomSeed=$(printf '%s\n' "${tmpPathList[@]}" | sort | nix-hash --base32 /dev/stdin)
inArgs+=("-frandom-seed=$randomSeed")
echo final args:
echo "$gccPath" "${inArgs[@]}"
$dry_run || "$gccPath" "${inArgs[@]}"
"""}