Skip to content

Commit 58f7719

Browse files
committed
fix: Encode source locations
1 parent 7106e38 commit 58f7719

File tree

6 files changed

+1497
-442
lines changed

6 files changed

+1497
-442
lines changed

Diff for: packages/cxx-gen-ast/src/gen_ast_encoder_cc.ts

+78-33
Original file line numberDiff line numberDiff line change
@@ -104,54 +104,45 @@ export function gen_ast_encoder_cc({
104104
});
105105
} else if (m.kind === "node" && by_base.has(m.type)) {
106106
const className = makeClassName(m.type);
107-
emit(
108-
`const auto [${m.name}, ${m.name}Type] = accept${className}(ast->${m.name});`
109-
);
107+
emit(`const auto [${m.name}, ${m.name}Type] =`);
108+
emit(` accept${className}(ast->${m.name});`);
110109
finalizers.push(() => {
111110
emit(` builder.add_${fieldName}(${m.name});`);
112-
emit(
113-
` builder.add_${fieldName}_type(static_cast<io::${className}>(${m.name}Type));`
114-
);
111+
emit(` builder.add_${fieldName}_type(`);
112+
emit(` static_cast<io::${className}>(${m.name}Type));`);
115113
});
116114
} else if (m.kind === "node-list" && !by_base.has(m.type)) {
117115
const className = makeClassName(m.type);
118-
emit(
119-
` std::vector<flatbuffers::Offset<io::${className}>> ${m.name}Offsets;`
120-
);
116+
emit(` std::vector<flatbuffers::Offset<io::${className}>>`);
117+
emit(` ${m.name}Offsets;`);
121118
emit(` for (auto it = ast->${m.name}; it; it = it->next) {`);
122119
emit(` if (!it->value) continue;`);
123120
emit(` ${m.name}Offsets.emplace_back(accept(it->value).o);`);
124121
emit(` }`);
125122
emit();
126-
emit(
127-
` auto ${m.name}OffsetsVector = fbb_.CreateVector(${m.name}Offsets);`
128-
);
123+
emit(` auto ${m.name}OffsetsVector = fbb_.CreateVector(`);
124+
emit(` ${m.name}Offsets);`);
129125
finalizers.push(() => {
130126
emit(` builder.add_${fieldName}(${m.name}OffsetsVector);`);
131127
});
132128
} else if (m.kind === "node-list" && by_base.has(m.type)) {
133129
const className = makeClassName(m.type);
134130
emit(` std::vector<flatbuffers::Offset<>> ${m.name}Offsets;`);
135-
emit(
136-
` std::vector<std::underlying_type_t<io::${className}>> ${m.name}Types;`
137-
);
131+
emit(` std::vector<std::underlying_type_t<io::${className}>>`);
132+
emit(` ${m.name}Types;`);
138133
emit();
139-
emit(` for (auto it = ast->${m.name}; it; it = it->next) {`);
140-
emit(` if (!it->value) continue;`);
141-
emit(
142-
` const auto [offset, type] = accept${className}(it->value);`
143-
);
144-
emit(` ${m.name}Offsets.push_back(offset);`);
145-
emit(` ${m.name}Types.push_back(type);`);
146-
emit(` }`);
147-
134+
emit(` for (auto it = ast->${m.name}; it; it = it->next) {`);
135+
emit(` if (!it->value) continue;`);
136+
emit(` const auto [offset, type] = accept${className}(`);
137+
emit(` it->value);`);
138+
emit(` ${m.name}Offsets.push_back(offset);`);
139+
emit(` ${m.name}Types.push_back(type);`);
140+
emit(` }`);
148141
emit();
149-
emit(
150-
` auto ${m.name}OffsetsVector = fbb_.CreateVector(${m.name}Offsets);`
151-
);
152-
emit(
153-
` auto ${m.name}TypesVector = fbb_.CreateVector(${m.name}Types);`
154-
);
142+
emit(` auto ${m.name}OffsetsVector = fbb_.CreateVector(`);
143+
emit(` ${m.name}Offsets);`);
144+
emit(` auto ${m.name}TypesVector = fbb_.CreateVector(`);
145+
emit(` ${m.name}Types);`);
155146

156147
finalizers.push(() => {
157148
emit(` builder.add_${fieldName}(${m.name}OffsetsVector);`);
@@ -178,9 +169,13 @@ export function gen_ast_encoder_cc({
178169
emitLiteral(m, "identifiers_", finalizers);
179170
} else if (m.kind === "attribute" && m.type === "TokenKind") {
180171
finalizers.push(() => {
181-
emit(
182-
` builder.add_${fieldName}(static_cast<std::uint32_t>(ast->${m.name}));`
183-
);
172+
emit(` builder.add_${fieldName}(`);
173+
emit(` static_cast<std::uint32_t>(ast->${m.name}));`);
174+
});
175+
} else if (m.kind == "token") {
176+
emit(` auto ${m.name} = encodeSourceLocation(ast->${m.name});`);
177+
finalizers.push(() => {
178+
emit(` builder.add_${fieldName}(${m.name}.o);`);
184179
});
185180
}
186181
});
@@ -207,6 +202,7 @@ export function gen_ast_encoder_cc({
207202
#include <cxx/literals.h>
208203
#include <cxx/names.h>
209204
#include <cxx/translation_unit.h>
205+
#include <cxx/private/format.h>
210206
211207
#include <algorithm>
212208
@@ -219,13 +215,17 @@ auto ASTEncoder::operator()(TranslationUnit* unit) -> std::span<const std::uint8
219215
Table<StringLiteral> stringLiterals;
220216
Table<IntegerLiteral> integerLiterals;
221217
Table<FloatLiteral> floatLiterals;
218+
SourceFiles sourceFiles;
219+
SourceLines sourceLines;
222220
223221
std::swap(unit_, unit);
224222
std::swap(identifiers_, identifiers);
225223
std::swap(charLiterals_, charLiterals);
226224
std::swap(stringLiterals_, stringLiterals);
227225
std::swap(integerLiterals_, integerLiterals);
228226
std::swap(floatLiterals_, floatLiterals);
227+
std::swap(sourceFiles_, sourceFiles);
228+
std::swap(sourceLines_, sourceLines);
229229
230230
auto [unitOffset, unitType] = acceptUnit(unit_->ast());
231231
@@ -242,6 +242,8 @@ auto ASTEncoder::operator()(TranslationUnit* unit) -> std::span<const std::uint8
242242
std::swap(stringLiterals_, stringLiterals);
243243
std::swap(integerLiterals_, integerLiterals);
244244
std::swap(floatLiterals_, floatLiterals);
245+
std::swap(sourceFiles_, sourceFiles);
246+
std::swap(sourceLines_, sourceLines);
245247
246248
fbb_.Finish(builder.Finish(), io::SerializedUnitIdentifier());
247249
@@ -257,6 +259,49 @@ auto ASTEncoder::accept(AST* ast) -> flatbuffers::Offset<> {
257259
return offset;
258260
}
259261
262+
auto ASTEncoder::encodeSourceLocation(const SourceLocation& loc)
263+
-> flatbuffers::Offset<> {
264+
if (!loc) {
265+
return {};
266+
}
267+
268+
std::string_view fileName;
269+
uint32_t line = 0, column = 0;
270+
unit_->getTokenStartPosition(loc, &line, &column, &fileName);
271+
272+
flatbuffers::Offset<io::SourceLine> sourceLineOffset;
273+
274+
auto key = std::tuple(fileName, line);
275+
276+
if (sourceLines_.contains(key)) {
277+
sourceLineOffset = sourceLines_.at(key).o;
278+
} else {
279+
flatbuffers::Offset<flatbuffers::String> fileNameOffset;
280+
281+
if (sourceFiles_.contains(fileName)) {
282+
fileNameOffset = sourceFiles_.at(fileName);
283+
} else {
284+
fileNameOffset = fbb_.CreateString(fileName);
285+
sourceFiles_.emplace(fileName, fileNameOffset.o);
286+
}
287+
288+
io::SourceLineBuilder sourceLineBuilder{fbb_};
289+
sourceLineBuilder.add_file_name(fileNameOffset);
290+
sourceLineBuilder.add_line(line);
291+
sourceLineOffset = sourceLineBuilder.Finish();
292+
sourceLines_.emplace(std::move(key), sourceLineOffset.o);
293+
}
294+
295+
io::SourceLocationBuilder sourceLocationBuilder{fbb_};
296+
sourceLocationBuilder.add_source_line(sourceLineOffset);
297+
sourceLocationBuilder.add_column(column);
298+
299+
auto offset = sourceLocationBuilder.Finish();
300+
301+
return offset.Union();
302+
}
303+
304+
260305
${code.join("\n")}
261306
262307
} // namespace cxx

Diff for: packages/cxx-gen-ast/src/gen_ast_encoder_h.ts

+15
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,22 @@ export function gen_ast_encoder_h({
4444
emit(` using Table = std::unordered_map<const T*,`);
4545
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
4646
emit();
47+
emit(` using SourceFiles = std::unordered_map<`);
48+
emit(` std::string_view,`);
49+
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
50+
emit();
51+
emit(` using SourceLines = std::map<`);
52+
emit(` std::tuple<std::string_view, std::uint32_t>,`);
53+
emit(` flatbuffers::Offset<flatbuffers::String>>;`);
54+
emit();
4755
emit(` TranslationUnit* unit_ = nullptr;`);
4856
emit(` Table<Identifier> identifiers_;`);
4957
emit(` Table<CharLiteral> charLiterals_;`);
5058
emit(` Table<StringLiteral> stringLiterals_;`);
5159
emit(` Table<IntegerLiteral> integerLiterals_;`);
5260
emit(` Table<FloatLiteral> floatLiterals_;`);
61+
emit(` SourceFiles sourceFiles_;`);
62+
emit(` SourceLines sourceLines_;`);
5363
emit(` flatbuffers::FlatBufferBuilder fbb_;`);
5464
emit(` flatbuffers::Offset<> offset_;`);
5565
emit(` std::uint32_t type_ = 0;`);
@@ -61,6 +71,9 @@ export function gen_ast_encoder_h({
6171
emit(` -> std::span<const std::uint8_t>;`);
6272

6373
emit(`private:`);
74+
emit(` auto encodeSourceLocation(const SourceLocation& loc)`);
75+
emit(` -> flatbuffers::Offset<>;`);
76+
emit();
6477
emit(` auto accept(AST* ast) -> flatbuffers::Offset<>;`);
6578
by_base.forEach((_nodes, base) => {
6679
if (base === "AST") return;
@@ -90,10 +103,12 @@ export function gen_ast_encoder_h({
90103
#include <tuple>
91104
#include <span>
92105
#include <unordered_map>
106+
#include <map>
93107
94108
namespace cxx {
95109
96110
class TranslationUnit;
111+
class SourceLocation;
97112
98113
${code.join("\n")}
99114

Diff for: packages/cxx-gen-ast/src/gen_ast_fbs.ts

+8-38
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ export function gen_ast_fbs({ ast, output }: { ast: AST; output: string }) {
9696
case "node-list":
9797
break;
9898
case "token":
99-
emit(` ${fieldName}: Token;`);
99+
emit(` ${fieldName}: SourceLocation;`);
100100
break;
101101
case "token-list":
102-
emit(` ${fieldName}: [Token];`);
102+
emit(` ${fieldName}: [SourceLocation];`);
103103
break;
104104
case "attribute": {
105105
break;
@@ -116,35 +116,14 @@ export function gen_ast_fbs({ ast, output }: { ast: AST; output: string }) {
116116
const out = `${cpy_header}
117117
namespace cxx.io;
118118
119-
union TokenValue {
120-
Identifier,
121-
StringConstant,
122-
FloatConstant,
123-
IntConstant,
124-
}
125-
126-
table Identifier {
127-
name: string;
128-
}
129-
130-
table FloatConstant {
131-
value: float64;
132-
}
133-
134-
table IntConstant {
135-
value: int64;
136-
}
137-
138-
table StringConstant {
139-
value: string;
119+
table SourceLine {
120+
file_name: string;
121+
line: uint32;
140122
}
141123
142-
table Token {
143-
kind: uint16;
144-
flags: uint16;
145-
offset: uint32;
146-
length: uint32;
147-
value: TokenValue;
124+
table SourceLocation {
125+
source_line: SourceLine;
126+
column: uint32;
148127
}
149128
150129
${code.join("\n")}
@@ -153,15 +132,6 @@ table SerializedUnit {
153132
version: uint32;
154133
unit: Unit;
155134
file_name: string;
156-
integer_literals: [string];
157-
float_literals: [string];
158-
char_literals: [string];
159-
string_literals: [string];
160-
comment_literals: [string];
161-
wide_string_literals: [string];
162-
utf8_string_literals: [string];
163-
utf16_string_literals: [string];
164-
utf32_string_literals: [string];
165135
}
166136
167137
root_type SerializedUnit;

0 commit comments

Comments
 (0)