Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Heap2Local: Optimize Arrays in addition to Structs #6478

Merged
merged 55 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 47 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
b9ca885
work
kripken Apr 4, 2024
b06f76e
work
kripken Apr 5, 2024
a75e5c1
work
kripken Apr 5, 2024
b86fcf3
builds
kripken Apr 5, 2024
3e728e6
todo
kripken Apr 5, 2024
5dfda8d
formt
kripken Apr 5, 2024
336dfcb
work
kripken Apr 5, 2024
fba8353
fix.comment
kripken Apr 5, 2024
dfe819d
typo
kripken Apr 5, 2024
517d02d
fix compiler warnings
kripken Apr 5, 2024
4608b29
exciting
kripken Apr 5, 2024
347ebe0
yolo
kripken Apr 5, 2024
90cabde
yolo
kripken Apr 5, 2024
eef7df5
prep
kripken Apr 5, 2024
86c98ad
work
kripken Apr 5, 2024
a51a7cb
test
kripken Apr 5, 2024
3b44e01
twork
kripken Apr 5, 2024
e21ce34
fix
kripken Apr 5, 2024
2962579
work
kripken Apr 5, 2024
1e01201
work
kripken Apr 5, 2024
6a39f6a
work
kripken Apr 5, 2024
faad72d
work
kripken Apr 5, 2024
d5383ad
work
kripken Apr 5, 2024
bfab495
work
kripken Apr 5, 2024
fd102dd
fix
kripken Apr 5, 2024
0dd4970
work
kripken Apr 5, 2024
4bd7a37
work
kripken Apr 5, 2024
bdb88d6
work
kripken Apr 5, 2024
3a195cf
work
kripken Apr 5, 2024
1b1cc3b
work
kripken Apr 5, 2024
996ab05
work
kripken Apr 5, 2024
7ebb094
test
kripken Apr 5, 2024
00616b8
fix
kripken Apr 5, 2024
e6480d6
format
kripken Apr 5, 2024
bffbdd8
fix
kripken Apr 5, 2024
1c9e1c5
commento
kripken Apr 5, 2024
d11f563
Merge remote-tracking branch 'origin/main' into heap2local.nfc.2
kripken Apr 6, 2024
db6780f
test
kripken Apr 6, 2024
e833122
test
kripken Apr 6, 2024
8a0431a
work
kripken Apr 8, 2024
c793aa8
test
kripken Apr 8, 2024
bfceeb8
work
kripken Apr 8, 2024
9b2be22
test?
kripken Apr 8, 2024
58253c7
fix
kripken Apr 8, 2024
08fe484
undo
kripken Apr 8, 2024
41c0765
nicer
kripken Apr 8, 2024
37de9a5
comment
kripken Apr 8, 2024
7a57798
Merge remote-tracking branch 'origin/main' into heap2local.nfc.2
kripken Apr 9, 2024
f1e9013
fix
kripken Apr 9, 2024
5c17188
clarify
kripken Apr 9, 2024
becbf61
feedback
kripken Apr 9, 2024
4e279b0
feedback: remove unused code
kripken Apr 9, 2024
568856c
feedback: fix
kripken Apr 9, 2024
83b3b0b
feedback: test
kripken Apr 9, 2024
1e02354
feedback: test
kripken Apr 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
319 changes: 298 additions & 21 deletions src/passes/Heap2Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ struct EscapeAnalyzer {
};

// Analyze an allocation to see if it escapes or not.
bool escapes(StructNew* allocation) {
bool escapes(Expression* allocation) {
// A queue of flows from children to parents. When something is in the queue
// here then it assumed that it is ok for the allocation to be at the child
// (that is, we have already checked the child before placing it in the
Expand Down Expand Up @@ -330,7 +330,7 @@ struct EscapeAnalyzer {
return false;
}

ParentChildInteraction getParentChildInteraction(StructNew* allocation,
ParentChildInteraction getParentChildInteraction(Expression* allocation,
Expression* parent,
Expression* child) {
// If there is no parent then we are the body of the function, and that
Expand All @@ -340,7 +340,7 @@ struct EscapeAnalyzer {
}

struct Checker : public Visitor<Checker> {
StructNew* allocation;
Expression* allocation;
Expression* child;

// Assume escaping (or some other problem we cannot analyze) unless we are
Expand Down Expand Up @@ -415,8 +415,28 @@ struct EscapeAnalyzer {
escapes = false;
fullyConsumes = true;
}
void visitArraySet(ArraySet* curr) {
if (!curr->index->is<Const>()) {
// Array operations on nonconstant indexes do not escape in the normal
// sense, but they do escape from our being able to analyze them, so
// stop as soon as we see one.
return;
}

// TODO Array and I31 operations
// As StructGet.
if (curr->ref == child) {
escapes = false;
fullyConsumes = true;
}
}
void visitArrayGet(ArrayGet* curr) {
if (!curr->index->is<Const>()) {
return;
}
escapes = false;
fullyConsumes = true;
}
// TODO other GC operations
} checker;

checker.allocation = allocation;
Expand Down Expand Up @@ -776,6 +796,186 @@ struct Struct2Local : PostWalker<Struct2Local> {
}
};

// An optimizer that handles the rewriting to turn a nonescaping array
// allocation into a struct allocation. Struct2Local can then be run on that
// allocation.
// TODO: As with Struct2Local doing a single rewrite walk at the end (for all
// structs) would be more efficient, but more complex.
struct Array2Struct : PostWalker<Array2Struct> {
Expression* allocation;
EscapeAnalyzer& analyzer;
Function* func;
Builder builder;

Array2Struct(Expression* allocation,
EscapeAnalyzer& analyzer,
Function* func,
Module& wasm)
: allocation(allocation), analyzer(analyzer), func(func), builder(wasm) {

// Build the struct type we need: as many fields as the size of the array,
// all of the same type as the array's element.
auto arrayType = allocation->type.getHeapType();
auto element = arrayType.getArray().element;
if (auto* arrayNew = allocation->dynCast<ArrayNew>()) {
numFields = getIndex(arrayNew->size);
} else if (auto* arrayNewFixed = allocation->dynCast<ArrayNewFixed>()) {
numFields = arrayNewFixed->values.size();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this part can be factored into a getSize helper?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, done.

} else {
WASM_UNREACHABLE("bad allocation");
}
FieldList fields;
for (Index i = 0; i < numFields; i++) {
fields.push_back(element);
}
HeapType structType = Struct(fields);

// Generate a StructNew to replace the ArrayNew*.
if (auto* arrayNew = allocation->dynCast<ArrayNew>()) {
if (arrayNew->isWithDefault()) {
structNew = builder.makeStructNew(structType, {});
arrayNewReplacement = structNew;
} else {
// The ArrayNew is writing the same value to each slot of the array. To
// do the same for the struct, we store that value in an local and
// generate multiple local.gets of it.
auto local = builder.addVar(func, element.type);
auto* set = builder.makeLocalSet(local, arrayNew->init);
std::vector<Expression*> gets;
for (Index i = 0; i < numFields; i++) {
gets.push_back(builder.makeLocalGet(local, element.type));
}
structNew = builder.makeStructNew(structType, gets);
// The ArrayNew* will be replaced with a block containing the local.set
// and the structNew.
arrayNewReplacement = builder.makeSequence(set, structNew);
// The data flows through the new block we just added: inform the
// analysis of that by telling it to treat it as code that it reached
// (only code we reached during the tracing of the allocation through
// the function will be optimized in Struct2Local).
noteIsReached(arrayNewReplacement);
}
} else if (auto* arrayNewFixed = allocation->dynCast<ArrayNewFixed>()) {
// Simply use the same values as the array.
structNew = builder.makeStructNew(structType, arrayNewFixed->values);
arrayNewReplacement = structNew;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to noteIsReached in this case as well? If not, why? If it's because of the noteCurrentIsReached calls below, then why do we need the noteIsReached above?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good point, this was not consistent. It happened to work but it's confusing. I moved the noteIsReached to a shared location and applied it to both structNew and arrayNewReplacement uniformly, with a better (hopefully) explanation.

} else {
WASM_UNREACHABLE("bad allocation");
}

// Update types along the path reached by the allocation: whenever we see
// the array type, it should be the struct type. Note that we do this before
// the walk that is after us, because the walk may read these types and
// depend on them to be valid.
auto nullArray = Type(arrayType, Nullable);
auto nonNullArray = Type(arrayType, NonNullable);
auto nullStruct = Type(structType, Nullable);
auto nonNullStruct = Type(structType, NonNullable);
for (auto* reached : analyzer.reached) {
if (reached->type == nullArray) {
reached->type = nullStruct;
} else if (reached->type == nonNullArray) {
reached->type = nonNullStruct;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about supertypes of the array type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, the fuzzer noticed this overnight as well 😄 Fixed.


// Technically we should also fix up the types of locals as well, but after
// Struct2Local those locals will no longer be used anyhow (the locals hold
// allocations that are removed), so avoid that work (though it makes the
// IR temporarily invalid in between Array2Struct and Struct2Local).

// Replace the things we need to using the visit* methods.
walk(func->body);

if (refinalize) {
ReFinalize().walkFunctionInModule(func, &wasm);
}
}

// In rare cases we may need to refinalize, as with Struct2Local.
bool refinalize = false;

// The number of slots in the array (which will become the number of fields in
// the struct).
Index numFields;

// The StructNew that replaces the ArrayNew*. The user of this class can then
// optimize that StructNew using Struct2Local.
StructNew* structNew;

// The replacement for the original ArrayNew*. Typically this is |structNew|,
// unless we have additional code we need alongside it.
Expression* arrayNewReplacement;

void visitArrayNew(ArrayNew* curr) {
if (curr == allocation) {
replaceCurrent(arrayNewReplacement);
noteCurrentIsReached();
}
}

void visitArrayNewFixed(ArrayNewFixed* curr) {
if (curr == allocation) {
replaceCurrent(arrayNewReplacement);
noteCurrentIsReached();
}
}

void visitArraySet(ArraySet* curr) {
if (!analyzer.reached.count(curr)) {
return;
}

// If this is an OOB array.set then we trap.
auto index = getIndex(curr->index);
if (index >= numFields) {
replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref),
builder.makeDrop(curr->value),
builder.makeUnreachable()}));
// We added an unreachable, and must propagate that type.
refinalize = true;
return;
}

// Convert the ArraySet into a StructSet.
replaceCurrent(builder.makeStructSet(index, curr->ref, curr->value));
noteCurrentIsReached();
}

void visitArrayGet(ArrayGet* curr) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to handle other array operations like ArrayCopy, ArrayFill, or the string allocation instructions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a TODO for those. This is safe atm as EscapeAnalyzer will assume the worst for anything it does not recognize, like those.

if (!analyzer.reached.count(curr)) {
return;
}

// If this is an OOB array.get then we trap.
auto index = getIndex(curr->index);
if (index >= numFields) {
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref),
builder.makeUnreachable()));
// We added an unreachable, and must propagate that type.
refinalize = true;
return;
}

// Convert the ArrayGet into a StructGet.
replaceCurrent(
builder.makeStructGet(index, curr->ref, curr->type, curr->signed_));
noteCurrentIsReached();
}

// Get the value in an expression we know must contain a constant index.
Index getIndex(Expression* curr) {
return curr->cast<Const>()->value.getUnsigned();
}

// Inform the analyzer that the current expression (which we just replaced)
// has been reached in its analysis. We are replacing something it reached,
// and want it to consider it as its equivalent.
void noteCurrentIsReached() { noteIsReached(getCurrent()); }

void noteIsReached(Expression* curr) { analyzer.reached.insert(curr); }
};

// Core Heap2Local optimization that operates on a function: Builds up the data
// structures we need (LocalGraph, etc.) that we will use across multiple
// analyses of allocations, and then runs those analyses and optimizes where
Expand All @@ -800,44 +1000,121 @@ struct Heap2Local {
// repeated work, see above.
std::unordered_set<Expression*> seen;

// All the allocations in the function.
// TODO: Arrays (of constant size) as well, if all element accesses use
// constant indexes. One option might be to first convert such
// nonescaping arrays into structs.
FindAll<StructNew> allocations(func->body);
// Find all the relevant allocations in the function: StructNew, ArrayNew,
// ArrayNewFixed.
struct AllocationFinder : public PostWalker<AllocationFinder> {
std::vector<StructNew*> structNews;
std::vector<Expression*> arrayNews;

void visitStructNew(StructNew* curr) {
// Ignore unreachable allocations that DCE will remove anyhow.
if (curr->type != Type::unreachable) {
structNews.push_back(curr);
}
}
void visitArrayNew(ArrayNew* curr) {
// Only new arrays of fixed size are relevant for us.
if (curr->type != Type::unreachable && isValidSize(curr->size)) {
arrayNews.push_back(curr);
}
}
void visitArrayNewFixed(ArrayNewFixed* curr) {
if (curr->type != Type::unreachable &&
isValidSize(curr->values.size())) {
arrayNews.push_back(curr);
}
}

bool isValidSize(Expression* size) {
// The size of an array is valid if it is constant, and its value is
// valid.
if (auto* c = size->dynCast<Const>()) {
return isValidSize(c->value.getUnsigned());
}
return false;
}

for (auto* allocation : allocations.list) {
bool isValidSize(Index size) {
// Set a reasonable limit on the size here, as valid wasm can contain
// things like (array.new (i32.const -1)) which will likely fail at
// runtime on a VM limitation on array size. We also are converting a
// heap allocation to a stack allocation, which can be noticeable in
// some cases, so to be careful here use a fairly small limit.
return size < 20;
}
} finder;
finder.walk(func->body);

// First, lower non-escaping arrays into structs. That allows us to handle
// arrays in a single place, and let all the rest of this pass assume we are
// working on structs. We are in fact only optimizing struct-like arrays
// here, that is, arrays of a fixed size and whose items are accessed using
// constant indexes, so they are effectively structs, and turning them into
// such allows uniform handling later.
for (auto* allocation : finder.arrayNews) {
// The point of this optimization is to replace heap allocations with
// locals, so we must be able to place the data in locals.
if (!canHandleAsLocals(allocation->type)) {
continue;
}

EscapeAnalyzer analyzer(
seen, localGraph, parents, branchTargets, passOptions, wasm);
if (!analyzer.escapes(allocation)) {
// Convert the allocation and all its uses into a struct. Then convert
// the struct into locals.
auto* structNew =
Array2Struct(allocation, analyzer, func, wasm).structNew;
Struct2Local(structNew, analyzer, func, wasm);
}
}

// Next, process all structNews.
for (auto* allocation : finder.structNews) {
// As above, we must be able to use locals for this data.
if (!canHandleAsLocals(allocation->type)) {
continue;
}

// Check for escaping, noting relevant information as we go. If this does
// not escape, optimize it.
// not escape, optimize it into locals.
EscapeAnalyzer analyzer(
seen, localGraph, parents, branchTargets, passOptions, wasm);
if (!analyzer.escapes(allocation)) {
Struct2Local optimizer(allocation, analyzer, func, wasm);
Struct2Local(allocation, analyzer, func, wasm);
}
}
}

bool canHandleAsLocal(const Field& field) {
if (!TypeUpdating::canHandleAsLocal(field.type)) {
return false;
}
if (field.isPacked()) {
// TODO: support packed fields by adding coercions/truncations.
return false;
}
return true;
}

bool canHandleAsLocals(Type type) {
if (type == Type::unreachable) {
return false;
}
auto& fields = type.getHeapType().getStruct().fields;
for (auto field : fields) {
if (!TypeUpdating::canHandleAsLocal(field.type)) {
return false;
}
if (field.isPacked()) {
// TODO: support packed fields by adding coercions/truncations.
return false;

auto heapType = type.getHeapType();
if (heapType.isStruct()) {
auto& fields = heapType.getStruct().fields;
for (auto field : fields) {
if (!canHandleAsLocal(field)) {
return false;
}
}
return true;
}
return true;

assert(heapType.isArray());
return canHandleAsLocal(heapType.getArray().element);
}
};

Expand Down
Loading
Loading