From b9ca8853ea87fde8d455980bcd3926732cac5c11 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Apr 2024 15:04:17 -0700 Subject: [PATCH 01/53] work --- src/passes/Heap2Local.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index fcf0d88863b..2eb5b5802a7 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -203,7 +203,12 @@ struct Heap2LocalOptimizer { continue; } - convertToLocals(allocation); + // Check for escaping, noting relevant information as we go. If this does + // not escape, optimize it. + Rewriter rewriter(allocation, func, module); + if (!escapes(allocation, rewriter)) { + rewriter.applyOptimization(); + } } } @@ -518,11 +523,10 @@ struct Heap2LocalOptimizer { Mixes, }; - // Analyze an allocation to see if we can convert it from a heap allocation to - // locals. - void convertToLocals(StructNew* allocation) { - Rewriter rewriter(allocation, func, module); - + // Analyze an allocation to see if it escapes or not. We receive a Rewriter + // instance on which we store important information as we go, which will be + // necessary if we optimize later. + bool escapes(StructNew* allocation, Rewriter& rewriter) { // A queue of flows from children to parents. When something is in the queue // here then it assumed that it is ok for the allocation to be at the child // (that is, we have already checked the child before placing it in the @@ -546,7 +550,7 @@ struct Heap2LocalOptimizer { interaction == ParentChildInteraction::Mixes) { // If the parent may let us escape, or the parent mixes other values // up with us, give up. - return; + return true; } // The parent either fully consumes us, or flows us onwards; either way, @@ -575,7 +579,7 @@ struct Heap2LocalOptimizer { // added the parent to |seen| for both children, the reference would get // blocked from being optimized. if (!seen.emplace(parent).second) { - return; + return true; } // We can proceed, as the parent interacts with us properly, and we are @@ -617,11 +621,11 @@ struct Heap2LocalOptimizer { // We finished the loop over the flows. Do the final checks. if (!getsAreExclusiveToSets(rewriter.sets)) { - return; + return true; } - // We can do it, hurray! - rewriter.applyOptimization(); + // Nothing escapes, hurray! + return false; } ParentChildInteraction getParentChildInteraction(StructNew* allocation, From b06f76e826c58240d8182e75837c3f559faa498f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Apr 2024 17:01:05 -0700 Subject: [PATCH 02/53] work --- src/passes/Heap2Local.cpp | 711 ++++++++++++++++++++------------------ 1 file changed, 370 insertions(+), 341 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 2eb5b5802a7..2d97b1ef0f6 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -166,342 +166,34 @@ namespace wasm { namespace { -struct Heap2LocalOptimizer { - Function* func; - Module* module; - const PassOptions& passOptions; +// A particular escape analysis is templated over an optimizer. We inform the +// optimizer of relevant things as we work, to avoid repeated work later: the +// optimizer can use the information we provided it to optimize efficiently. +// +// The optimizer provides the following methods: +// +// * noteLocalSet(LocalSet*) that notes a local.set that the allocation is +// written to. +// +template +struct EscapeAnalyzer { + Optimizer& optimizer; - // To find allocations that do not escape, we must track locals so that we - // can see how allocations flow from sets to gets and so forth. - // TODO: only scan reference types - LocalGraph localGraph; + // All the expressions that have already been seen by the optimizer. TODO refer to commentt + std::unordered_set& seen; // To find what escapes, we need to follow where values flow, both up to - // parents, and via branches. - Parents parents; - BranchUtils::BranchTargets branchTargets; - - Heap2LocalOptimizer(Function* func, - Module* module, - const PassOptions& passOptions) - : func(func), module(module), passOptions(passOptions), - localGraph(func, module), parents(func->body), branchTargets(func->body) { - // We need to track what each set influences, to see where its value can - // flow to. - localGraph.computeSetInfluences(); - - // All the allocations in the function. - // TODO: Arrays (of constant size) as well, if all element accesses use - // constant indexes. One option might be to first convert such - // nonescaping arrays into structs. - FindAll allocations(func->body); - - for (auto* allocation : allocations.list) { - // The point of this optimization is to replace heap allocations with - // locals, so we must be able to place the data in locals. - if (!canHandleAsLocals(allocation->type)) { - continue; - } - - // Check for escaping, noting relevant information as we go. If this does - // not escape, optimize it. - Rewriter rewriter(allocation, func, module); - if (!escapes(allocation, rewriter)) { - rewriter.applyOptimization(); - } - } - } - - bool canHandleAsLocals(Type type) { - if (type == Type::unreachable) { - return false; - } - auto& fields = type.getHeapType().getStruct().fields; - for (auto field : fields) { - if (!TypeUpdating::canHandleAsLocal(field.type)) { - return false; - } - if (field.isPacked()) { - // TODO: support packed fields by adding coercions/truncations. - return false; - } - } - return true; - } - - // Handles the rewriting that we do to perform the optimization. We store the - // data that rewriting will need here, while we analyze, and then if we can do - // the optimization, we tell it to run. - // - // TODO: Doing a single rewrite walk at the end would be more efficient, but - // it would need to be more complex. - struct Rewriter : PostWalker { - StructNew* allocation; - Function* func; - Module* module; - Builder builder; - const FieldList& fields; - - Rewriter(StructNew* allocation, Function* func, Module* module) - : allocation(allocation), func(func), module(module), builder(*module), - fields(allocation->type.getHeapType().getStruct().fields) {} - - // We must track all the local.sets that write the allocation, to verify - // exclusivity. - std::unordered_set sets; - - // All the expressions we reached during the flow analysis. That is exactly - // all the places where our allocation is used. We track these so that we - // can fix them up at the end, if the optimization ends up possible. - std::unordered_set reached; - - // Maps indexes in the struct to the local index that will replace them. - std::vector localIndexes; - - // In rare cases we may need to refinalize, see below. - bool refinalize = false; - - void applyOptimization() { - // Allocate locals to store the allocation's fields in. - for (auto field : fields) { - localIndexes.push_back(builder.addVar(func, field.type)); - } - - // Replace the things we need to using the visit* methods. - walk(func->body); - - if (refinalize) { - ReFinalize().walkFunctionInModule(func, module); - } - } - - // Rewrite the code in visit* methods. The general approach taken is to - // replace the allocation with a null reference (which may require changing - // types in some places, like making a block return value nullable), and to - // remove all uses of it as much as possible, using the information we have - // (for example, when our allocation reaches a RefAsNonNull we can simply - // remove that operation as we know it would not throw). Some things are - // left to other passes, like getting rid of dropped code without side - // effects. - - // Adjust the type that flows through an expression, updating that type as - // necessary. - void adjustTypeFlowingThrough(Expression* curr) { - if (!reached.count(curr)) { - return; - } - - // Our allocation passes through this expr. We must turn its type into a - // nullable one, because we will remove things like RefAsNonNull of it, - // which means we may no longer have a non-nullable value as our input, - // and we could fail to validate. It is safe to make this change in terms - // of our parent, since we know very specifically that only safe things - // will end up using our value, like a StructGet or a Drop, which do not - // care about non-nullability. - assert(curr->type.isRef()); - curr->type = Type(curr->type.getHeapType(), Nullable); - } - - void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } - - void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } - - void visitLocalSet(LocalSet* curr) { - if (!reached.count(curr)) { - return; - } - - // We don't need any sets of the reference to any of the locals it - // originally was written to. - if (curr->isTee()) { - replaceCurrent(curr->value); - } else { - replaceCurrent(builder.makeDrop(curr->value)); - } - } - - void visitLocalGet(LocalGet* curr) { - if (!reached.count(curr)) { - return; - } - - // Uses of this get will drop it, so the value does not matter. Replace it - // with something else, which avoids issues with non-nullability (when - // non-nullable locals are enabled), which could happen like this: - // - // (local $x (ref $foo)) - // (local.set $x ..) - // (.. (local.get $x)) - // - // If we remove the set but not the get then the get would appear to read - // the default value of a non-nullable local, which is not allowed. - // - // For simplicity, replace the get with a null. We anyhow have null types - // in the places where our allocation was earlier, see notes on - // visitBlock, and so using a null here adds no extra complexity. - replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); - } - - void visitBreak(Break* curr) { - if (!reached.count(curr)) { - return; - } - - // Breaks that our allocation flows through may change type, as we now - // have a nullable type there. - curr->finalize(); - } - - void visitStructNew(StructNew* curr) { - if (curr != allocation) { - return; - } - - // First, assign the initial values to the new locals. - std::vector contents; - - if (!allocation->isWithDefault()) { - // We must assign the initial values to temp indexes, then copy them - // over all at once. If instead we did set them as we go, then we might - // hit a problem like this: - // - // (local.set X (new_X)) - // (local.set Y (block (result ..) - // (.. (local.get X) ..) ;; returns new_X, wrongly - // (new_Y) - // ) - // - // Note how we assign to the local X and use it during the assignment to - // the local Y - but we should still see the old value of X, not new_X. - // Temp locals X', Y' can ensure that: - // - // (local.set X' (new_X)) - // (local.set Y' (block (result ..) - // (.. (local.get X) ..) ;; returns the proper, old X - // (new_Y) - // ) - // .. - // (local.set X (local.get X')) - // (local.set Y (local.get Y')) - std::vector tempIndexes; - - for (auto field : fields) { - tempIndexes.push_back(builder.addVar(func, field.type)); - } - - // Store the initial values into the temp locals. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back( - builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); - } - - // Copy them to the normal ones. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeLocalGet(tempIndexes[i], fields[i].type))); - } - - // TODO Check if the nondefault case does not increase code size in some - // cases. A heap allocation that implicitly sets the default values - // is smaller than multiple explicit settings of locals to - // defaults. - } else { - // Set the default values. - // Note that we must assign the defaults because we might be in a loop, - // that is, there might be a previous value. - for (Index i = 0; i < localIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); - } - } - - // Replace the allocation with a null reference. This changes the type - // from non-nullable to nullable, but as we optimize away the code that - // the allocation reaches, we will handle that. - contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); - replaceCurrent(builder.makeBlock(contents)); - } - - void visitRefAs(RefAs* curr) { - if (!reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefAsNonNull, since we proved it - // contains our allocation, and so cannot trap. - assert(curr->op == RefAsNonNull); - replaceCurrent(curr->value); - } - - void visitRefCast(RefCast* curr) { - if (!reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefCast, since we proved it - // contains our allocation and we have checked that the type of - // the allocation is a subtype of the type of the cast, and so - // cannot trap. - replaceCurrent(curr->ref); - - // We need to refinalize after this, as while we know the cast is not - // logically needed - the value flowing through will not be used - we do - // need validation to succeed even before other optimizations remove the - // code. For example: - // - // (block (result $B) - // (ref.cast $B - // (block (result $A) - // - // Without the cast this does not validate, so we need to refinalize - // (which will fix this, as we replace the unused value with a null, so - // that type will propagate out). - refinalize = true; - } - - void visitStructSet(StructSet* curr) { - if (!reached.count(curr)) { - return; - } - - // Drop the ref (leaving it to other opts to remove, when possible), and - // write the data to the local instead of the heap allocation. - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalSet(localIndexes[curr->index], curr->value))); - } - - void visitStructGet(StructGet* curr) { - if (!reached.count(curr)) { - return; - } - - auto type = fields[curr->index].type; - if (type != curr->type) { - // Normally we are just replacing a struct.get with a local.get of a - // local that was created to have the same type as the struct's field, - // but in some cases we may refine, if the struct.get's reference type - // is less refined than the reference that actually arrives, like here: - // - // (struct.get $parent 0 - // (block (ref $parent) - // (struct.new $child))) - // - // We allocated locals for the field of the child, and are replacing a - // get of the parent field with a local of the same type as the child's, - // which may be more refined. - refinalize = true; - } - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalGet(localIndexes[curr->index], type))); - } - }; - - // All the expressions we have already looked at. - std::unordered_set seen; + // parents, and via branches, and through locals. + // TODO: only scan reference types in LocalGraph + const LocalGraph& localGraph; + const Parents& parents; + const BranchUtils::BranchTargets& branchTargets; + + EscapeAnalyzer(Optimizer& optimizer, + std::unordered_set& seen, + const LocalGraph& localGraph, + const Parents& parents, + const BranchUtils::BranchTargets& branchTargets) : optimizer(optimizer), seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets) {} enum class ParentChildInteraction { // The parent lets the child escape. E.g. the parent is a call. @@ -523,10 +215,11 @@ struct Heap2LocalOptimizer { Mixes, }; - // Analyze an allocation to see if it escapes or not. We receive a Rewriter - // instance on which we store important information as we go, which will be - // necessary if we optimize later. - bool escapes(StructNew* allocation, Rewriter& rewriter) { + // Analyze an allocation to see if it escapes or not. We receive an optimizer + // class which we will inform of things as we go, that will allow the + // optimizer to avoid doing a new pass on the entire function if it decides + // to optimize. + bool escapes(StructNew* allocation) { // A queue of flows from children to parents. When something is in the queue // here then it assumed that it is ok for the allocation to be at the child // (that is, we have already checked the child before placing it in the @@ -596,7 +289,7 @@ struct Heap2LocalOptimizer { // exclusive use of our allocation by all the gets that read the value. // Note the set, and we will check the gets at the end once we know all // of our sets. - rewriter.sets.insert(set); + optimizer.noteLocalSet(set); // We must also look at how the value flows from those gets. if (auto* getsReached = getGetsReached(set)) { @@ -808,7 +501,343 @@ struct Heap2LocalOptimizer { } }; -struct Heap2Local : public WalkerPass> { +// An optimizer that handles the rewriting to turn a struct into locals. +// +// TODO: Doing a single rewrite walk at the end (for all structs) would be more +// efficient, but it would need to be more complex. +struct Struct2Local : PostWalker { + StructNew* allocation; + Function* func; + Module* module; + Builder builder; + const FieldList& fields; + + Struct2Local(StructNew* allocation, Function* func, Module* module) + : allocation(allocation), func(func), module(module), builder(*module), + fields(allocation->type.getHeapType().getStruct().fields) {} + + // We must track all the local.sets that write the allocation, to verify + // exclusivity. + std::unordered_set sets; + + // All the expressions we reached during the flow analysis. That is exactly + // all the places where our allocation is used. We track these so that we + // can fix them up at the end, if the optimization ends up possible. + std::unordered_set reached; + + // Maps indexes in the struct to the local index that will replace them. + std::vector localIndexes; + + // In rare cases we may need to refinalize, see below. + bool refinalize = false; + + void noteLocalSet(LocalSet* set) { + sets.insert(set); + } + + void applyOptimization() { + // Allocate locals to store the allocation's fields in. + for (auto field : fields) { + localIndexes.push_back(builder.addVar(func, field.type)); + } + + // Replace the things we need to using the visit* methods. + walk(func->body); + + if (refinalize) { + ReFinalize().walkFunctionInModule(func, module); + } + } + + // Rewrite the code in visit* methods. The general approach taken is to + // replace the allocation with a null reference (which may require changing + // types in some places, like making a block return value nullable), and to + // remove all uses of it as much as possible, using the information we have + // (for example, when our allocation reaches a RefAsNonNull we can simply + // remove that operation as we know it would not throw). Some things are + // left to other passes, like getting rid of dropped code without side + // effects. + + // Adjust the type that flows through an expression, updating that type as + // necessary. + void adjustTypeFlowingThrough(Expression* curr) { + if (!reached.count(curr)) { + return; + } + + // Our allocation passes through this expr. We must turn its type into a + // nullable one, because we will remove things like RefAsNonNull of it, + // which means we may no longer have a non-nullable value as our input, + // and we could fail to validate. It is safe to make this change in terms + // of our parent, since we know very specifically that only safe things + // will end up using our value, like a StructGet or a Drop, which do not + // care about non-nullability. + assert(curr->type.isRef()); + curr->type = Type(curr->type.getHeapType(), Nullable); + } + + void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } + + void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } + + void visitLocalSet(LocalSet* curr) { + if (!reached.count(curr)) { + return; + } + + // We don't need any sets of the reference to any of the locals it + // originally was written to. + if (curr->isTee()) { + replaceCurrent(curr->value); + } else { + replaceCurrent(builder.makeDrop(curr->value)); + } + } + + void visitLocalGet(LocalGet* curr) { + if (!reached.count(curr)) { + return; + } + + // Uses of this get will drop it, so the value does not matter. Replace it + // with something else, which avoids issues with non-nullability (when + // non-nullable locals are enabled), which could happen like this: + // + // (local $x (ref $foo)) + // (local.set $x ..) + // (.. (local.get $x)) + // + // If we remove the set but not the get then the get would appear to read + // the default value of a non-nullable local, which is not allowed. + // + // For simplicity, replace the get with a null. We anyhow have null types + // in the places where our allocation was earlier, see notes on + // visitBlock, and so using a null here adds no extra complexity. + replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); + } + + void visitBreak(Break* curr) { + if (!reached.count(curr)) { + return; + } + + // Breaks that our allocation flows through may change type, as we now + // have a nullable type there. + curr->finalize(); + } + + void visitStructNew(StructNew* curr) { + if (curr != allocation) { + return; + } + + // First, assign the initial values to the new locals. + std::vector contents; + + if (!allocation->isWithDefault()) { + // We must assign the initial values to temp indexes, then copy them + // over all at once. If instead we did set them as we go, then we might + // hit a problem like this: + // + // (local.set X (new_X)) + // (local.set Y (block (result ..) + // (.. (local.get X) ..) ;; returns new_X, wrongly + // (new_Y) + // ) + // + // Note how we assign to the local X and use it during the assignment to + // the local Y - but we should still see the old value of X, not new_X. + // Temp locals X', Y' can ensure that: + // + // (local.set X' (new_X)) + // (local.set Y' (block (result ..) + // (.. (local.get X) ..) ;; returns the proper, old X + // (new_Y) + // ) + // .. + // (local.set X (local.get X')) + // (local.set Y (local.get Y')) + std::vector tempIndexes; + + for (auto field : fields) { + tempIndexes.push_back(builder.addVar(func, field.type)); + } + + // Store the initial values into the temp locals. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back( + builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); + } + + // Copy them to the normal ones. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeLocalGet(tempIndexes[i], fields[i].type))); + } + + // TODO Check if the nondefault case does not increase code size in some + // cases. A heap allocation that implicitly sets the default values + // is smaller than multiple explicit settings of locals to + // defaults. + } else { + // Set the default values. + // Note that we must assign the defaults because we might be in a loop, + // that is, there might be a previous value. + for (Index i = 0; i < localIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); + } + } + + // Replace the allocation with a null reference. This changes the type + // from non-nullable to nullable, but as we optimize away the code that + // the allocation reaches, we will handle that. + contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); + replaceCurrent(builder.makeBlock(contents)); + } + + void visitRefAs(RefAs* curr) { + if (!reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefAsNonNull, since we proved it + // contains our allocation, and so cannot trap. + assert(curr->op == RefAsNonNull); + replaceCurrent(curr->value); + } + + void visitRefCast(RefCast* curr) { + if (!reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefCast, since we proved it + // contains our allocation and we have checked that the type of + // the allocation is a subtype of the type of the cast, and so + // cannot trap. + replaceCurrent(curr->ref); + + // We need to refinalize after this, as while we know the cast is not + // logically needed - the value flowing through will not be used - we do + // need validation to succeed even before other optimizations remove the + // code. For example: + // + // (block (result $B) + // (ref.cast $B + // (block (result $A) + // + // Without the cast this does not validate, so we need to refinalize + // (which will fix this, as we replace the unused value with a null, so + // that type will propagate out). + refinalize = true; + } + + void visitStructSet(StructSet* curr) { + if (!reached.count(curr)) { + return; + } + + // Drop the ref (leaving it to other opts to remove, when possible), and + // write the data to the local instead of the heap allocation. + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalSet(localIndexes[curr->index], curr->value))); + } + + void visitStructGet(StructGet* curr) { + if (!reached.count(curr)) { + return; + } + + auto type = fields[curr->index].type; + if (type != curr->type) { + // Normally we are just replacing a struct.get with a local.get of a + // local that was created to have the same type as the struct's field, + // but in some cases we may refine, if the struct.get's reference type + // is less refined than the reference that actually arrives, like here: + // + // (struct.get $parent 0 + // (block (ref $parent) + // (struct.new $child))) + // + // We allocated locals for the field of the child, and are replacing a + // get of the parent field with a local of the same type as the child's, + // which may be more refined. + refinalize = true; + } + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalGet(localIndexes[curr->index], type))); + } +}; + +struct Heap2Local { + Function* func; + Module* module; + const PassOptions& passOptions; + + LocalGraph localGraph; + Parents parents; + BranchUtils::BranchTargets branchTargets; + + Heap2Local(Function* func, + Module* module, + const PassOptions& passOptions) + : func(func), module(module), passOptions(passOptions), + localGraph(func, module), parents(func->body), branchTargets(func->body) { + // We need to track what each set influences, to see where its value can + // flow to. + localGraph.computeSetInfluences(); + + // All the expressions we have already looked at. We use this to avoid + // repeated work, see above. + std::unordered_set seen; + + // All the allocations in the function. + // TODO: Arrays (of constant size) as well, if all element accesses use + // constant indexes. One option might be to first convert such + // nonescaping arrays into structs. + FindAll allocations(func->body); + + for (auto* allocation : allocations.list) { + // The point of this optimization is to replace heap allocations with + // locals, so we must be able to place the data in locals. + if (!canHandleAsLocals(allocation->type)) { + continue; + } + + // Check for escaping, noting relevant information as we go. If this does + // not escape, optimize it. + Struct2Local struct2Local(allocation, func, module); + EscapeAnalyzer analyzer(struct2Local, seen, localGraph, parents, branchTargets); + if (!analyzer.escapes(allocation)) { + struct2Local.applyOptimization(); + } + } + } + + bool canHandleAsLocals(Type type) { + if (type == Type::unreachable) { + return false; + } + auto& fields = type.getHeapType().getStruct().fields; + for (auto field : fields) { + if (!TypeUpdating::canHandleAsLocal(field.type)) { + return false; + } + if (field.isPacked()) { + // TODO: support packed fields by adding coercions/truncations. + return false; + } + } + return true; + } +}; + +struct Heap2LocalPass : public WalkerPass> { bool isFunctionParallel() override { return true; } std::unique_ptr create() override { @@ -825,12 +854,12 @@ struct Heap2Local : public WalkerPass> { // vacuum, in particular, to optimize such nested allocations. // TODO Consider running multiple iterations here, and running vacuum in // between them. - Heap2LocalOptimizer(func, getModule(), getPassOptions()); + Heap2Local(func, getModule(), getPassOptions()); } }; } // anonymous namespace -Pass* createHeap2LocalPass() { return new Heap2Local(); } +Pass* createHeap2LocalPass() { return new Heap2LocalPass(); } } // namespace wasm From a75e5c10c69579cb69a77f22f4b1a688874a73c4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:01:50 -0700 Subject: [PATCH 03/53] work --- src/ir/branch-utils.h | 8 +++++-- src/ir/parents.h | 8 ++++++- src/passes/Heap2Local.cpp | 47 +++++++++++---------------------------- 3 files changed, 26 insertions(+), 37 deletions(-) diff --git a/src/ir/branch-utils.h b/src/ir/branch-utils.h index d90f3a1947d..e87ddcecc64 100644 --- a/src/ir/branch-utils.h +++ b/src/ir/branch-utils.h @@ -419,10 +419,14 @@ struct BranchTargets { // Gets the expression that defines this branch target, i.e., where we branch // to if we branch to that name. - Expression* getTarget(Name name) { return inner.targets[name]; } + Expression* getTarget(Name name) const { + auto iter = inner.targets.find(name); + assert(iter != inner.targets.end()); + return iter->second; + } // Gets the expressions branching to a target. - std::unordered_set getBranches(Name name) { + std::unordered_set getBranches(Name name) const { auto iter = inner.branches.find(name); if (iter != inner.branches.end()) { return iter->second; diff --git a/src/ir/parents.h b/src/ir/parents.h index c5614546cb8..4a2c14d5869 100644 --- a/src/ir/parents.h +++ b/src/ir/parents.h @@ -24,7 +24,13 @@ namespace wasm { struct Parents { Parents(Expression* expr) { inner.walk(expr); } - Expression* getParent(Expression* curr) { return inner.parentMap[curr]; } + Expression* getParent(Expression* curr) const { + auto iter = inner.parentMap.find(curr); + if (iter != inner.parentMap.end()) { + return iter-> second; + } + return nullptr; + } private: struct Inner diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 2d97b1ef0f6..61cfbcfa9ba 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -166,19 +166,7 @@ namespace wasm { namespace { -// A particular escape analysis is templated over an optimizer. We inform the -// optimizer of relevant things as we work, to avoid repeated work later: the -// optimizer can use the information we provided it to optimize efficiently. -// -// The optimizer provides the following methods: -// -// * noteLocalSet(LocalSet*) that notes a local.set that the allocation is -// written to. -// -template struct EscapeAnalyzer { - Optimizer& optimizer; - // All the expressions that have already been seen by the optimizer. TODO refer to commentt std::unordered_set& seen; @@ -189,11 +177,14 @@ struct EscapeAnalyzer { const Parents& parents; const BranchUtils::BranchTargets& branchTargets; - EscapeAnalyzer(Optimizer& optimizer, - std::unordered_set& seen, + EscapeAnalyzer(std::unordered_set& seen, const LocalGraph& localGraph, const Parents& parents, - const BranchUtils::BranchTargets& branchTargets) : optimizer(optimizer), seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets) {} + const BranchUtils::BranchTargets& branchTargets) : seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets) {} + + // We must track all the local.sets that write the allocation, to verify + // exclusivity. + std::unordered_set sets; enum class ParentChildInteraction { // The parent lets the child escape. E.g. the parent is a call. @@ -215,10 +206,7 @@ struct EscapeAnalyzer { Mixes, }; - // Analyze an allocation to see if it escapes or not. We receive an optimizer - // class which we will inform of things as we go, that will allow the - // optimizer to avoid doing a new pass on the entire function if it decides - // to optimize. + // Analyze an allocation to see if it escapes or not. bool escapes(StructNew* allocation) { // A queue of flows from children to parents. When something is in the queue // here then it assumed that it is ok for the allocation to be at the child @@ -289,7 +277,7 @@ struct EscapeAnalyzer { // exclusive use of our allocation by all the gets that read the value. // Note the set, and we will check the gets at the end once we know all // of our sets. - optimizer.noteLocalSet(set); + sets.insert(set); // We must also look at how the value flows from those gets. if (auto* getsReached = getGetsReached(set)) { @@ -313,7 +301,7 @@ struct EscapeAnalyzer { } // We finished the loop over the flows. Do the final checks. - if (!getsAreExclusiveToSets(rewriter.sets)) { + if (!getsAreExclusiveToSets(sets)) { return true; } @@ -477,7 +465,7 @@ struct EscapeAnalyzer { // else), we need to check whether all the gets that read that value cannot // read anything else (which would be the case if another set writes to that // local, in the right live range). - bool getsAreExclusiveToSets(const std::unordered_set& sets) { + bool getsAreExclusiveToSets(const std::unordered_set& sets) { // TODO remove param // Find all the relevant gets (which may overlap between the sets). std::unordered_set gets; for (auto* set : sets) { @@ -516,10 +504,6 @@ struct Struct2Local : PostWalker { : allocation(allocation), func(func), module(module), builder(*module), fields(allocation->type.getHeapType().getStruct().fields) {} - // We must track all the local.sets that write the allocation, to verify - // exclusivity. - std::unordered_set sets; - // All the expressions we reached during the flow analysis. That is exactly // all the places where our allocation is used. We track these so that we // can fix them up at the end, if the optimization ends up possible. @@ -531,11 +515,7 @@ struct Struct2Local : PostWalker { // In rare cases we may need to refinalize, see below. bool refinalize = false; - void noteLocalSet(LocalSet* set) { - sets.insert(set); - } - - void applyOptimization() { + void applyOptimization(const EscapeAnalyzer& analyzer) { // Allocate locals to store the allocation's fields in. for (auto field : fields) { localIndexes.push_back(builder.addVar(func, field.type)); @@ -811,10 +791,9 @@ struct Heap2Local { // Check for escaping, noting relevant information as we go. If this does // not escape, optimize it. - Struct2Local struct2Local(allocation, func, module); - EscapeAnalyzer analyzer(struct2Local, seen, localGraph, parents, branchTargets); + EscapeAnalyzer analyzer(localGraph, parents, branchTargets); if (!analyzer.escapes(allocation)) { - struct2Local.applyOptimization(); + struct2Local.applyOptimization(analyzer); } } } From b86fcf360fa498c876bc22bba573ca27e880feee Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:20:13 -0700 Subject: [PATCH 04/53] builds --- src/passes/Heap2Local.cpp | 86 +++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 61cfbcfa9ba..5a2351e5d51 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -176,16 +176,25 @@ struct EscapeAnalyzer { const LocalGraph& localGraph; const Parents& parents; const BranchUtils::BranchTargets& branchTargets; + const PassOptions& passOptions; + Module& wasm; EscapeAnalyzer(std::unordered_set& seen, const LocalGraph& localGraph, const Parents& parents, - const BranchUtils::BranchTargets& branchTargets) : seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets) {} + const BranchUtils::BranchTargets& branchTargets, + const PassOptions& passOptions, + Module& wasm) : seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} // We must track all the local.sets that write the allocation, to verify // exclusivity. std::unordered_set sets; + // All the expressions we reached during the flow analysis. That is exactly + // all the places where our allocation is used. We track these so that we + // can fix them up at the end, if the optimization ends up possible. + std::unordered_set reached; + enum class ParentChildInteraction { // The parent lets the child escape. E.g. the parent is a call. Escapes, @@ -296,8 +305,8 @@ struct EscapeAnalyzer { // If we got to here, then we can continue to hope that we can optimize // this allocation. Mark the parent and child as reached by it, and // continue. - rewriter.reached.insert(parent); - rewriter.reached.insert(child); + reached.insert(parent); + reached.insert(child); } // We finished the loop over the flows. Do the final checks. @@ -414,7 +423,7 @@ struct EscapeAnalyzer { // Finally, check for mixing. If the child is the immediate fallthrough // of the parent then no other values can be mixed in. - if (Properties::getImmediateFallthrough(parent, passOptions, *module) == + if (Properties::getImmediateFallthrough(parent, passOptions, wasm) == child) { return ParentChildInteraction::Flows; } @@ -440,7 +449,7 @@ struct EscapeAnalyzer { return ParentChildInteraction::Mixes; } - LocalGraph::SetInfluences* getGetsReached(LocalSet* set) { + const LocalGraph::SetInfluences* getGetsReached(LocalSet* set) { auto iter = localGraph.setInfluences.find(set); if (iter != localGraph.setInfluences.end()) { return &iter->second; @@ -448,7 +457,7 @@ struct EscapeAnalyzer { return nullptr; } - BranchUtils::NameSet branchesSentByParent(Expression* child, + const BranchUtils::NameSet branchesSentByParent(Expression* child, Expression* parent) { BranchUtils::NameSet names; BranchUtils::operateOnScopeNameUsesAndSentValues( @@ -478,7 +487,9 @@ struct EscapeAnalyzer { // Check that the gets can only read from the specific known sets. for (auto* get : gets) { - for (auto* set : localGraph.getSetses[get]) { + auto iter = localGraph.getSetses.find(get); + assert(iter != localGraph.getSetses.end()); + for (auto* set : iter->second) { if (sets.count(set) == 0) { return false; } @@ -495,27 +506,16 @@ struct EscapeAnalyzer { // efficient, but it would need to be more complex. struct Struct2Local : PostWalker { StructNew* allocation; + const EscapeAnalyzer& analyzer; Function* func; - Module* module; + Module& wasm; Builder builder; const FieldList& fields; - Struct2Local(StructNew* allocation, Function* func, Module* module) - : allocation(allocation), func(func), module(module), builder(*module), - fields(allocation->type.getHeapType().getStruct().fields) {} + Struct2Local(StructNew* allocation, const EscapeAnalyzer& analyzer, Function* func, Module& wasm) + : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), builder(wasm), + fields(allocation->type.getHeapType().getStruct().fields) { - // All the expressions we reached during the flow analysis. That is exactly - // all the places where our allocation is used. We track these so that we - // can fix them up at the end, if the optimization ends up possible. - std::unordered_set reached; - - // Maps indexes in the struct to the local index that will replace them. - std::vector localIndexes; - - // In rare cases we may need to refinalize, see below. - bool refinalize = false; - - void applyOptimization(const EscapeAnalyzer& analyzer) { // Allocate locals to store the allocation's fields in. for (auto field : fields) { localIndexes.push_back(builder.addVar(func, field.type)); @@ -525,10 +525,16 @@ struct Struct2Local : PostWalker { walk(func->body); if (refinalize) { - ReFinalize().walkFunctionInModule(func, module); + ReFinalize().walkFunctionInModule(func, &wasm); } } + // Maps indexes in the struct to the local index that will replace them. + std::vector localIndexes; + + // In rare cases we may need to refinalize, see below. + bool refinalize = false; + // Rewrite the code in visit* methods. The general approach taken is to // replace the allocation with a null reference (which may require changing // types in some places, like making a block return value nullable), and to @@ -541,7 +547,7 @@ struct Struct2Local : PostWalker { // Adjust the type that flows through an expression, updating that type as // necessary. void adjustTypeFlowingThrough(Expression* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -561,7 +567,7 @@ struct Struct2Local : PostWalker { void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } void visitLocalSet(LocalSet* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -575,7 +581,7 @@ struct Struct2Local : PostWalker { } void visitLocalGet(LocalGet* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -597,7 +603,7 @@ struct Struct2Local : PostWalker { } void visitBreak(Break* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -679,7 +685,7 @@ struct Struct2Local : PostWalker { } void visitRefAs(RefAs* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -690,7 +696,7 @@ struct Struct2Local : PostWalker { } void visitRefCast(RefCast* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -716,7 +722,7 @@ struct Struct2Local : PostWalker { } void visitStructSet(StructSet* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -728,7 +734,7 @@ struct Struct2Local : PostWalker { } void visitStructGet(StructGet* curr) { - if (!reached.count(curr)) { + if (!analyzer.reached.count(curr)) { return; } @@ -756,7 +762,7 @@ struct Struct2Local : PostWalker { struct Heap2Local { Function* func; - Module* module; + Module& wasm; const PassOptions& passOptions; LocalGraph localGraph; @@ -764,10 +770,10 @@ struct Heap2Local { BranchUtils::BranchTargets branchTargets; Heap2Local(Function* func, - Module* module, + Module& wasm, const PassOptions& passOptions) - : func(func), module(module), passOptions(passOptions), - localGraph(func, module), parents(func->body), branchTargets(func->body) { + : func(func), wasm(wasm), passOptions(passOptions), + localGraph(func, &wasm), parents(func->body), branchTargets(func->body) { // We need to track what each set influences, to see where its value can // flow to. localGraph.computeSetInfluences(); @@ -791,9 +797,9 @@ struct Heap2Local { // Check for escaping, noting relevant information as we go. If this does // not escape, optimize it. - EscapeAnalyzer analyzer(localGraph, parents, branchTargets); + EscapeAnalyzer analyzer(seen, localGraph, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { - struct2Local.applyOptimization(analyzer); + Struct2Local optimizer(allocation, analyzer, func, wasm); } } } @@ -820,7 +826,7 @@ struct Heap2LocalPass : public WalkerPass> { bool isFunctionParallel() override { return true; } std::unique_ptr create() override { - return std::make_unique(); + return std::make_unique(); } void doWalkFunction(Function* func) { @@ -833,7 +839,7 @@ struct Heap2LocalPass : public WalkerPass> { // vacuum, in particular, to optimize such nested allocations. // TODO Consider running multiple iterations here, and running vacuum in // between them. - Heap2Local(func, getModule(), getPassOptions()); + Heap2Local(func, *getModule(), getPassOptions()); } }; From 3e728e6b0280dd59064d9bddfddefd6d97fcfd0c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:21:45 -0700 Subject: [PATCH 05/53] todo --- src/passes/Heap2Local.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 5a2351e5d51..ef916f8cd92 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -310,7 +310,7 @@ struct EscapeAnalyzer { } // We finished the loop over the flows. Do the final checks. - if (!getsAreExclusiveToSets(sets)) { + if (!getsAreExclusiveToSets()) { return true; } @@ -474,7 +474,7 @@ struct EscapeAnalyzer { // else), we need to check whether all the gets that read that value cannot // read anything else (which would be the case if another set writes to that // local, in the right live range). - bool getsAreExclusiveToSets(const std::unordered_set& sets) { // TODO remove param + bool getsAreExclusiveToSets() { // Find all the relevant gets (which may overlap between the sets). std::unordered_set gets; for (auto* set : sets) { From 5dfda8d351a43fa0cc6e520ab3a50294259333b5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:22:00 -0700 Subject: [PATCH 06/53] formt --- src/ir/parents.h | 2 +- src/passes/Heap2Local.cpp | 29 +++++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/ir/parents.h b/src/ir/parents.h index 4a2c14d5869..5ab0bc2828c 100644 --- a/src/ir/parents.h +++ b/src/ir/parents.h @@ -27,7 +27,7 @@ struct Parents { Expression* getParent(Expression* curr) const { auto iter = inner.parentMap.find(curr); if (iter != inner.parentMap.end()) { - return iter-> second; + return iter->second; } return nullptr; } diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index ef916f8cd92..4bda8b8157e 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -167,7 +167,8 @@ namespace wasm { namespace { struct EscapeAnalyzer { - // All the expressions that have already been seen by the optimizer. TODO refer to commentt + // All the expressions that have already been seen by the optimizer. TODO + // refer to commentt std::unordered_set& seen; // To find what escapes, we need to follow where values flow, both up to @@ -184,7 +185,9 @@ struct EscapeAnalyzer { const Parents& parents, const BranchUtils::BranchTargets& branchTargets, const PassOptions& passOptions, - Module& wasm) : seen(seen), localGraph(localGraph), parents(parents), branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} + Module& wasm) + : seen(seen), localGraph(localGraph), parents(parents), + branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} // We must track all the local.sets that write the allocation, to verify // exclusivity. @@ -458,7 +461,7 @@ struct EscapeAnalyzer { } const BranchUtils::NameSet branchesSentByParent(Expression* child, - Expression* parent) { + Expression* parent) { BranchUtils::NameSet names; BranchUtils::operateOnScopeNameUsesAndSentValues( parent, [&](Name name, Expression* value) { @@ -512,9 +515,12 @@ struct Struct2Local : PostWalker { Builder builder; const FieldList& fields; - Struct2Local(StructNew* allocation, const EscapeAnalyzer& analyzer, Function* func, Module& wasm) - : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), builder(wasm), - fields(allocation->type.getHeapType().getStruct().fields) { + Struct2Local(StructNew* allocation, + const EscapeAnalyzer& analyzer, + Function* func, + Module& wasm) + : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), + builder(wasm), fields(allocation->type.getHeapType().getStruct().fields) { // Allocate locals to store the allocation's fields in. for (auto field : fields) { @@ -769,11 +775,9 @@ struct Heap2Local { Parents parents; BranchUtils::BranchTargets branchTargets; - Heap2Local(Function* func, - Module& wasm, - const PassOptions& passOptions) - : func(func), wasm(wasm), passOptions(passOptions), - localGraph(func, &wasm), parents(func->body), branchTargets(func->body) { + Heap2Local(Function* func, Module& wasm, const PassOptions& passOptions) + : func(func), wasm(wasm), passOptions(passOptions), localGraph(func, &wasm), + parents(func->body), branchTargets(func->body) { // We need to track what each set influences, to see where its value can // flow to. localGraph.computeSetInfluences(); @@ -797,7 +801,8 @@ struct Heap2Local { // Check for escaping, noting relevant information as we go. If this does // not escape, optimize it. - EscapeAnalyzer analyzer(seen, localGraph, parents, branchTargets, passOptions, wasm); + EscapeAnalyzer analyzer( + seen, localGraph, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { Struct2Local optimizer(allocation, analyzer, func, wasm); } From 336dfcbbdcb2a87a13368c580273c71068751a48 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:29:00 -0700 Subject: [PATCH 07/53] work --- src/passes/Heap2Local.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 4bda8b8157e..9310810769c 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -166,17 +166,26 @@ namespace wasm { namespace { +// Core analysis that provides an escapes() method to check if an allocation +// escapes in a way that prevents optimizing it away as described above. It also +// stashes information about the relevant expressions as it goes, which helps +// optimization later (|seen| and |reached|). struct EscapeAnalyzer { - // All the expressions that have already been seen by the optimizer. TODO - // refer to commentt + // All the expressions that have already been seen by the optimizer, see the + // comment above on exclusivity: once we have seen something whene analyzing + // one allocation, if we reach it again then we can exit early since seeing it + // a second time proves we lost exclusivity. We must track this across + // multiple instances of EscapeAnalyzer as each handles a particular + // allocation. std::unordered_set& seen; // To find what escapes, we need to follow where values flow, both up to // parents, and via branches, and through locals. - // TODO: only scan reference types in LocalGraph + // TODO: for efficiency, only scan reference types in LocalGraph const LocalGraph& localGraph; const Parents& parents; const BranchUtils::BranchTargets& branchTargets; + const PassOptions& passOptions; Module& wasm; @@ -503,7 +512,8 @@ struct EscapeAnalyzer { } }; -// An optimizer that handles the rewriting to turn a struct into locals. +// An optimizer that handles the rewriting to turn a struct allocation into +// locals. We run this after proving that allocation does not escape. // // TODO: Doing a single rewrite walk at the end (for all structs) would be more // efficient, but it would need to be more complex. @@ -766,6 +776,10 @@ struct Struct2Local : PostWalker { } }; +// Core Heap2Local optimization that operates on a function: Builds up the data +// structures we need (LocalGraph, etc.) that we will use across multiple +// analyses of allocations, and then runs those analyses and optimizes where +// possible. struct Heap2Local { Function* func; Module& wasm; From fba8353ac730183fcf0da56908f6d65fc978ad21 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:32:09 -0700 Subject: [PATCH 08/53] fix.comment --- test/lit/passes/heap2local.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index b693c49ee95..be6538a948f 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2045,8 +2045,8 @@ ;; CHECK-NEXT: ) (func $cast-failure (result anyref) (struct.get $B 0 - ;; The allocated $A arrives here, but the cast will fail, - ;; so we do not optimize. + ;; The allocated $A arrives here, but the cast will fail, so we do not + ;; optimize. (ref.cast (ref $B) (struct.new $A (struct.new $A From dfe819d98f2e8d2d25a25ffb9e72449f47b788b5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:39:37 -0700 Subject: [PATCH 09/53] typo --- src/passes/Heap2Local.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 9310810769c..534ff2f3d7d 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -172,7 +172,7 @@ namespace { // optimization later (|seen| and |reached|). struct EscapeAnalyzer { // All the expressions that have already been seen by the optimizer, see the - // comment above on exclusivity: once we have seen something whene analyzing + // comment above on exclusivity: once we have seen something when analyzing // one allocation, if we reach it again then we can exit early since seeing it // a second time proves we lost exclusivity. We must track this across // multiple instances of EscapeAnalyzer as each handles a particular From 517d02d7f455460e09c2a38cb87cb0efa15e2f7c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 10:44:54 -0700 Subject: [PATCH 10/53] fix compiler warnings --- src/ir/branch-utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir/branch-utils.h b/src/ir/branch-utils.h index e87ddcecc64..be3f7f7a811 100644 --- a/src/ir/branch-utils.h +++ b/src/ir/branch-utils.h @@ -109,11 +109,11 @@ void operateOnScopeNameUsesAndSentValues(Expression* expr, T func) { func(name, sw->value); } else if (auto* br = expr->dynCast()) { func(name, br->ref); - } else if (auto* tt = expr->dynCast()) { + } else if (expr->is()) { // The values are supplied by throwing instructions, so we are unable to // know what they will be here. func(name, nullptr); - } else if (auto* res = expr->dynCast()) { + } else if (expr->is()) { // The values are supplied by suspend instructions executed while running // the continuation, so we are unable to know what they will be here. func(name, nullptr); From 4608b295c4e383946ffa439890fd668f31b0af03 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 11:20:38 -0700 Subject: [PATCH 11/53] exciting --- src/passes/Heap2Local.cpp | 374 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 358 insertions(+), 16 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 534ff2f3d7d..f202930e957 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -776,6 +776,296 @@ struct Struct2Local : PostWalker { } }; +// An optimizer that handles the rewriting to turn a nonescaping array +// allocation into a struct allocation. Struct2Local can then be run on that +// allocation. +// TODO: Doing a single rewrite walk at the end (for all structs) would be more +// efficient, but it would need to be more complex. +struct Array2Struct : PostWalker { + Expression* allocation; + // TODO which of these do we need? + const EscapeAnalyzer& analyzer; + Function* func; + Module& wasm; + Builder builder; + const FieldList& fields; + + Struct2Local(Expression* allocation, + const EscapeAnalyzer& analyzer, + Function* func, + Module& wasm) + : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), + builder(wasm), fields(allocation->type.getHeapType().getStruct().fields) { + + // Build a proper struct type: as many fields as the size of the array, all + // of the same type as the array's element. + auto element = allocation->type.getHeapType().getArray().element; + Index numFields; + if (auto* arrayNew = allocation->dynCast()) { + numFields = arrayNew->size->cast()->value.getUnsigned(); + } else if (auto* arrayNewFixed = allocation->dynCast()) { + numFields = arrayNewFixed->values.size(); + } else { + WASM_UNREACHABLE("bad allocation"); + } + FieldList fields; + for (Index i = 0; i < numFields; i++) { + fields.push_back(element); + } + HeapType structType = Struct(fields); + + // Generate a proper StructNew. + if (auto* arrayNew = allocation->dynCast()) { + if (arrayNew->isWithDefault) { + structNew = builder.makeStructNew(structType, {}); + } else { + // The ArrayNew splatting a single value onto each slot of the array. To + // do the same for the struct, we store that value in an local and + // generate multiple local.gets of it. + abort(); // FIXME we need to replace the arrayNew with a block etc. + } + } else if (auto* arrayNewFixed = allocation->dynCast()) { + // Simply use the same values as the array. + structNew = builder.makeStructNew(structType, arrayNewFixed->values); + } else { + WASM_UNREACHABLE("bad allocation"); + } + + // Replace the things we need to using the visit* methods. + walk(func->body); + } + + StructNew* structNew; + + // Maps indexes in the struct to the local index that will replace them. + std::vector localIndexes; + + // Rewrite the code in visit* methods. The general approach taken is to + // replace the allocation with a null reference (which may require changing + // types in some places, like making a block return value nullable), and to + // remove all uses of it as much as possible, using the information we have + // (for example, when our allocation reaches a RefAsNonNull we can simply + // remove that operation as we know it would not throw). Some things are + // left to other passes, like getting rid of dropped code without side + // effects. + + // Adjust the type that flows through an expression, updating that type as + // necessary. + void adjustTypeFlowingThrough(Expression* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Our allocation passes through this expr. We must turn its type into a + // nullable one, because we will remove things like RefAsNonNull of it, + // which means we may no longer have a non-nullable value as our input, + // and we could fail to validate. It is safe to make this change in terms + // of our parent, since we know very specifically that only safe things + // will end up using our value, like a StructGet or a Drop, which do not + // care about non-nullability. + assert(curr->type.isRef()); + curr->type = Type(curr->type.getHeapType(), Nullable); + } + + void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } + + void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } + + void visitLocalSet(LocalSet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // We don't need any sets of the reference to any of the locals it + // originally was written to. + if (curr->isTee()) { + replaceCurrent(curr->value); + } else { + replaceCurrent(builder.makeDrop(curr->value)); + } + } + + void visitLocalGet(LocalGet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Uses of this get will drop it, so the value does not matter. Replace it + // with something else, which avoids issues with non-nullability (when + // non-nullable locals are enabled), which could happen like this: + // + // (local $x (ref $foo)) + // (local.set $x ..) + // (.. (local.get $x)) + // + // If we remove the set but not the get then the get would appear to read + // the default value of a non-nullable local, which is not allowed. + // + // For simplicity, replace the get with a null. We anyhow have null types + // in the places where our allocation was earlier, see notes on + // visitBlock, and so using a null here adds no extra complexity. + replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); + } + + void visitBreak(Break* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Breaks that our allocation flows through may change type, as we now + // have a nullable type there. + curr->finalize(); + } + + void visitStructNew(StructNew* curr) { + if (curr != allocation) { + return; + } + + // First, assign the initial values to the new locals. + std::vector contents; + + if (!allocation->isWithDefault()) { + // We must assign the initial values to temp indexes, then copy them + // over all at once. If instead we did set them as we go, then we might + // hit a problem like this: + // + // (local.set X (new_X)) + // (local.set Y (block (result ..) + // (.. (local.get X) ..) ;; returns new_X, wrongly + // (new_Y) + // ) + // + // Note how we assign to the local X and use it during the assignment to + // the local Y - but we should still see the old value of X, not new_X. + // Temp locals X', Y' can ensure that: + // + // (local.set X' (new_X)) + // (local.set Y' (block (result ..) + // (.. (local.get X) ..) ;; returns the proper, old X + // (new_Y) + // ) + // .. + // (local.set X (local.get X')) + // (local.set Y (local.get Y')) + std::vector tempIndexes; + + for (auto field : fields) { + tempIndexes.push_back(builder.addVar(func, field.type)); + } + + // Store the initial values into the temp locals. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back( + builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); + } + + // Copy them to the normal ones. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeLocalGet(tempIndexes[i], fields[i].type))); + } + + // TODO Check if the nondefault case does not increase code size in some + // cases. A heap allocation that implicitly sets the default values + // is smaller than multiple explicit settings of locals to + // defaults. + } else { + // Set the default values. + // Note that we must assign the defaults because we might be in a loop, + // that is, there might be a previous value. + for (Index i = 0; i < localIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); + } + } + + // Replace the allocation with a null reference. This changes the type + // from non-nullable to nullable, but as we optimize away the code that + // the allocation reaches, we will handle that. + contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); + replaceCurrent(builder.makeBlock(contents)); + } + + void visitRefAs(RefAs* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefAsNonNull, since we proved it + // contains our allocation, and so cannot trap. + assert(curr->op == RefAsNonNull); + replaceCurrent(curr->value); + } + + void visitRefCast(RefCast* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefCast, since we proved it + // contains our allocation and we have checked that the type of + // the allocation is a subtype of the type of the cast, and so + // cannot trap. + replaceCurrent(curr->ref); + + // We need to refinalize after this, as while we know the cast is not + // logically needed - the value flowing through will not be used - we do + // need validation to succeed even before other optimizations remove the + // code. For example: + // + // (block (result $B) + // (ref.cast $B + // (block (result $A) + // + // Without the cast this does not validate, so we need to refinalize + // (which will fix this, as we replace the unused value with a null, so + // that type will propagate out). + refinalize = true; + } + + void visitStructSet(StructSet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Drop the ref (leaving it to other opts to remove, when possible), and + // write the data to the local instead of the heap allocation. + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalSet(localIndexes[curr->index], curr->value))); + } + + void visitStructGet(StructGet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + auto type = fields[curr->index].type; + if (type != curr->type) { + // Normally we are just replacing a struct.get with a local.get of a + // local that was created to have the same type as the struct's field, + // but in some cases we may refine, if the struct.get's reference type + // is less refined than the reference that actually arrives, like here: + // + // (struct.get $parent 0 + // (block (ref $parent) + // (struct.new $child))) + // + // We allocated locals for the field of the child, and are replacing a + // get of the parent field with a local of the same type as the child's, + // which may be more refined. + refinalize = true; + } + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalGet(localIndexes[curr->index], type))); + } +}; +//waka + // Core Heap2Local optimization that operates on a function: Builds up the data // structures we need (LocalGraph, etc.) that we will use across multiple // analyses of allocations, and then runs those analyses and optimizes where @@ -800,44 +1090,96 @@ struct Heap2Local { // repeated work, see above. std::unordered_set seen; - // All the allocations in the function. - // TODO: Arrays (of constant size) as well, if all element accesses use - // constant indexes. One option might be to first convert such - // nonescaping arrays into structs. - FindAll allocations(func->body); + // Find all the relevant allocations in the function: StructNew, ArrayNew, + // ArrayNewFixed. + struct AllocationFinder : public { + std::vector structNews; + std::vector arrayNews; - for (auto* allocation : allocations.list) { + void visitStructNew(StructNew* curr) { + // Ignore unreachable allocations that DCE will remove anyhow. + if (curr->type != Type::unreachable) { + structNews.push_back(curr); + } + } + void visitArrayNew(ArrayNew* curr) { + // Only new arrays of fixed size are relevant for us. + if (curr->type != Type::unreachable && curr->size->is()) { + // TODO: Some reasonable limit on size? Also above and below. + arrayNews.push_back(curr); + } + } + void visitArrayNewFixed(ArrayNewFixed* curr) { + if (curr->type != Type::unreachable) { + arrayNews.push_back(curr); + } + } + } finder; + finder.walk(func->body); + + // First, lower non-escaping arrays into structs. That allows us to handle + // arrays in a single place, and let all the rest of this pass assume we are + // working on structs. We are in fact only optimizing struct-like arrays + // here, that is, arrays of a fixed size and whose items are accessed using + // constant indexes, so they are effectively structs, and turning them into + // such allows uniform handling later. + for (auto* allocation : finder.arrayNews) { // The point of this optimization is to replace heap allocations with // locals, so we must be able to place the data in locals. + auto element = allocation->type.getHeapType().getArray().element; + if (!canHandleAsLocals(element)) { + continue; + } + + EscapeAnalyzer analyzer( + seen, localGraph, parents, branchTargets, passOptions, wasm); + if (!analyzer.escapes(allocation)) { + // Convert the allocation and all its uses into a struct. Then convert + // the struct into locals. + allocation = Array2Struct(allocation, analyzer, func, wasm).structNew; + Struct2Local(allocation, analyzer, func, wasm); + } + } + + // Next, process all structNews, both original ones and ones that used to be + // arrays. + for (auto* allocation : finder.structNews) { + // As above, we must be able to use locals for this data. if (!canHandleAsLocals(allocation->type)) { continue; } // Check for escaping, noting relevant information as we go. If this does - // not escape, optimize it. + // not escape, optimize it into locals. EscapeAnalyzer analyzer( seen, localGraph, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { - Struct2Local optimizer(allocation, analyzer, func, wasm); + Struct2Local(allocation, analyzer, func, wasm); } } } - bool canHandleAsLocals(Type type) { - if (type == Type::unreachable) { + bool canHandleAsLocal(const Field& field) { + if (!TypeUpdating::canHandleAsLocal(field.type)) { + return false; + } + if (field.isPacked()) { + // TODO: support packed fields by adding coercions/truncations. return false; } + return true; + } + + bool canHandleAsLocals(Type type) { + // We filtered out unreachables already. + assert(type != Type::unreachable); + auto& fields = type.getHeapType().getStruct().fields; for (auto field : fields) { - if (!TypeUpdating::canHandleAsLocal(field.type)) { - return false; - } - if (field.isPacked()) { - // TODO: support packed fields by adding coercions/truncations. + if (!canHandleAsLocal(field)) { return false; } } - return true; } }; From 347ebe01eb6fc7374aa341727bfbcd5d0784b0c3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 11:27:23 -0700 Subject: [PATCH 12/53] yolo --- src/passes/Heap2Local.cpp | 227 +++----------------------------------- 1 file changed, 17 insertions(+), 210 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index f202930e957..85d14ca2759 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -831,211 +831,36 @@ struct Array2Struct : PostWalker { WASM_UNREACHABLE("bad allocation"); } - // Replace the things we need to using the visit* methods. + // Replace the things we need to using the visit* methods. Note that we do + // not bother to update types: all the places with the array type should + // technically have the new struct type, but we are going to optimize the + // struct into locals anyhow. walk(func->body); } + // The StructNew that replaces the ArrayNew*. The user of this class can then + // optimize that StructNew using Struct2Local. StructNew* structNew; - // Maps indexes in the struct to the local index that will replace them. - std::vector localIndexes; - - // Rewrite the code in visit* methods. The general approach taken is to - // replace the allocation with a null reference (which may require changing - // types in some places, like making a block return value nullable), and to - // remove all uses of it as much as possible, using the information we have - // (for example, when our allocation reaches a RefAsNonNull we can simply - // remove that operation as we know it would not throw). Some things are - // left to other passes, like getting rid of dropped code without side - // effects. - - // Adjust the type that flows through an expression, updating that type as - // necessary. - void adjustTypeFlowingThrough(Expression* curr) { - if (!analyzer.reached.count(curr)) { - return; + void visitArrayNew(ArrayNew* curr) { + if (curr == allocation) { + replaceCurrent(structNew); } - - // Our allocation passes through this expr. We must turn its type into a - // nullable one, because we will remove things like RefAsNonNull of it, - // which means we may no longer have a non-nullable value as our input, - // and we could fail to validate. It is safe to make this change in terms - // of our parent, since we know very specifically that only safe things - // will end up using our value, like a StructGet or a Drop, which do not - // care about non-nullability. - assert(curr->type.isRef()); - curr->type = Type(curr->type.getHeapType(), Nullable); } - void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } - - void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } - - void visitLocalSet(LocalSet* curr) { - if (!analyzer.reached.count(curr)) { - return; - } - - // We don't need any sets of the reference to any of the locals it - // originally was written to. - if (curr->isTee()) { - replaceCurrent(curr->value); - } else { - replaceCurrent(builder.makeDrop(curr->value)); + void visitArrayFixed(ArrayNewFixed* curr) { + if (curr == allocation) { + replaceCurrent(structNew); } } - void visitLocalGet(LocalGet* curr) { + void visitArraySet(ArraySet* curr) { if (!analyzer.reached.count(curr)) { return; } - // Uses of this get will drop it, so the value does not matter. Replace it - // with something else, which avoids issues with non-nullability (when - // non-nullable locals are enabled), which could happen like this: - // - // (local $x (ref $foo)) - // (local.set $x ..) - // (.. (local.get $x)) - // - // If we remove the set but not the get then the get would appear to read - // the default value of a non-nullable local, which is not allowed. - // - // For simplicity, replace the get with a null. We anyhow have null types - // in the places where our allocation was earlier, see notes on - // visitBlock, and so using a null here adds no extra complexity. - replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); - } - - void visitBreak(Break* curr) { - if (!analyzer.reached.count(curr)) { - return; - } - - // Breaks that our allocation flows through may change type, as we now - // have a nullable type there. - curr->finalize(); - } - - void visitStructNew(StructNew* curr) { - if (curr != allocation) { - return; - } - - // First, assign the initial values to the new locals. - std::vector contents; - - if (!allocation->isWithDefault()) { - // We must assign the initial values to temp indexes, then copy them - // over all at once. If instead we did set them as we go, then we might - // hit a problem like this: - // - // (local.set X (new_X)) - // (local.set Y (block (result ..) - // (.. (local.get X) ..) ;; returns new_X, wrongly - // (new_Y) - // ) - // - // Note how we assign to the local X and use it during the assignment to - // the local Y - but we should still see the old value of X, not new_X. - // Temp locals X', Y' can ensure that: - // - // (local.set X' (new_X)) - // (local.set Y' (block (result ..) - // (.. (local.get X) ..) ;; returns the proper, old X - // (new_Y) - // ) - // .. - // (local.set X (local.get X')) - // (local.set Y (local.get Y')) - std::vector tempIndexes; - - for (auto field : fields) { - tempIndexes.push_back(builder.addVar(func, field.type)); - } - - // Store the initial values into the temp locals. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back( - builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); - } - - // Copy them to the normal ones. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeLocalGet(tempIndexes[i], fields[i].type))); - } - - // TODO Check if the nondefault case does not increase code size in some - // cases. A heap allocation that implicitly sets the default values - // is smaller than multiple explicit settings of locals to - // defaults. - } else { - // Set the default values. - // Note that we must assign the defaults because we might be in a loop, - // that is, there might be a previous value. - for (Index i = 0; i < localIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); - } - } - - // Replace the allocation with a null reference. This changes the type - // from non-nullable to nullable, but as we optimize away the code that - // the allocation reaches, we will handle that. - contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); - replaceCurrent(builder.makeBlock(contents)); - } - - void visitRefAs(RefAs* curr) { - if (!analyzer.reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefAsNonNull, since we proved it - // contains our allocation, and so cannot trap. - assert(curr->op == RefAsNonNull); - replaceCurrent(curr->value); - } - - void visitRefCast(RefCast* curr) { - if (!analyzer.reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefCast, since we proved it - // contains our allocation and we have checked that the type of - // the allocation is a subtype of the type of the cast, and so - // cannot trap. - replaceCurrent(curr->ref); - - // We need to refinalize after this, as while we know the cast is not - // logically needed - the value flowing through will not be used - we do - // need validation to succeed even before other optimizations remove the - // code. For example: - // - // (block (result $B) - // (ref.cast $B - // (block (result $A) - // - // Without the cast this does not validate, so we need to refinalize - // (which will fix this, as we replace the unused value with a null, so - // that type will propagate out). - refinalize = true; - } - - void visitStructSet(StructSet* curr) { - if (!analyzer.reached.count(curr)) { - return; - } - - // Drop the ref (leaving it to other opts to remove, when possible), and - // write the data to the local instead of the heap allocation. - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalSet(localIndexes[curr->index], curr->value))); + // Convert the ArraySet into a StructSet. + replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->value); } void visitStructGet(StructGet* curr) { @@ -1043,28 +868,10 @@ struct Array2Struct : PostWalker { return; } - auto type = fields[curr->index].type; - if (type != curr->type) { - // Normally we are just replacing a struct.get with a local.get of a - // local that was created to have the same type as the struct's field, - // but in some cases we may refine, if the struct.get's reference type - // is less refined than the reference that actually arrives, like here: - // - // (struct.get $parent 0 - // (block (ref $parent) - // (struct.new $child))) - // - // We allocated locals for the field of the child, and are replacing a - // get of the parent field with a local of the same type as the child's, - // which may be more refined. - refinalize = true; - } - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalGet(localIndexes[curr->index], type))); + // Convert the ArrayGet into a StructGet. + replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_); } }; -//waka // Core Heap2Local optimization that operates on a function: Builds up the data // structures we need (LocalGraph, etc.) that we will use across multiple From 90cabdeb5ab6f70b577eb42de58ccc61293681b0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 11:38:07 -0700 Subject: [PATCH 13/53] yolo --- src/passes/Heap2Local.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 85d14ca2759..156074c6264 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -228,7 +228,7 @@ struct EscapeAnalyzer { }; // Analyze an allocation to see if it escapes or not. - bool escapes(StructNew* allocation) { + bool escapes(Expression* allocation) { // A queue of flows from children to parents. When something is in the queue // here then it assumed that it is ok for the allocation to be at the child // (that is, we have already checked the child before placing it in the @@ -330,7 +330,7 @@ struct EscapeAnalyzer { return false; } - ParentChildInteraction getParentChildInteraction(StructNew* allocation, + ParentChildInteraction getParentChildInteraction(Expression* allocation, Expression* parent, Expression* child) { // If there is no parent then we are the body of the function, and that @@ -340,7 +340,7 @@ struct EscapeAnalyzer { } struct Checker : public Visitor { - StructNew* allocation; + Expression* allocation; Expression* child; // Assume escaping (or some other problem we cannot analyze) unless we are @@ -790,7 +790,7 @@ struct Array2Struct : PostWalker { Builder builder; const FieldList& fields; - Struct2Local(Expression* allocation, + Array2Struct(Expression* allocation, const EscapeAnalyzer& analyzer, Function* func, Module& wasm) @@ -802,7 +802,7 @@ struct Array2Struct : PostWalker { auto element = allocation->type.getHeapType().getArray().element; Index numFields; if (auto* arrayNew = allocation->dynCast()) { - numFields = arrayNew->size->cast()->value.getUnsigned(); + numFields = getIndex(arrayNew->size); } else if (auto* arrayNewFixed = allocation->dynCast()) { numFields = arrayNewFixed->values.size(); } else { @@ -816,7 +816,7 @@ struct Array2Struct : PostWalker { // Generate a proper StructNew. if (auto* arrayNew = allocation->dynCast()) { - if (arrayNew->isWithDefault) { + if (arrayNew->isWithDefault()) { structNew = builder.makeStructNew(structType, {}); } else { // The ArrayNew splatting a single value onto each slot of the array. To @@ -860,16 +860,22 @@ struct Array2Struct : PostWalker { } // Convert the ArraySet into a StructSet. - replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->value); + // TODO: the actual chak that the index is constant, after escape anlysi and before opt! + replaceCurrent(builder.makeStructSet(getIndex(curr->index), curr->ref, curr->value)); } - void visitStructGet(StructGet* curr) { + void visitArrayGet(ArrayGet* curr) { if (!analyzer.reached.count(curr)) { return; } // Convert the ArrayGet into a StructGet. - replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_); + replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_)); + } + + // Get the value in an expression we know must contain a constant index. + Index getIndex(Expression* curr) { + return curr->cast()->value.getUnsigned(); } }; @@ -899,7 +905,7 @@ struct Heap2Local { // Find all the relevant allocations in the function: StructNew, ArrayNew, // ArrayNewFixed. - struct AllocationFinder : public { + struct AllocationFinder : public PostWalker { std::vector structNews; std::vector arrayNews; @@ -934,7 +940,7 @@ struct Heap2Local { // The point of this optimization is to replace heap allocations with // locals, so we must be able to place the data in locals. auto element = allocation->type.getHeapType().getArray().element; - if (!canHandleAsLocals(element)) { + if (!canHandleAsLocal(element)) { continue; } @@ -943,8 +949,8 @@ struct Heap2Local { if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. - allocation = Array2Struct(allocation, analyzer, func, wasm).structNew; - Struct2Local(allocation, analyzer, func, wasm); + auto* structNew = Array2Struct(allocation, analyzer, func, wasm).structNew; + Struct2Local(structNew, analyzer, func, wasm); } } @@ -987,6 +993,7 @@ struct Heap2Local { return false; } } + return true; } }; From eef7df5514317c7ce63d3e6bfa057deac0a975c9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 11:46:24 -0700 Subject: [PATCH 14/53] prep --- test/lit/passes/heap2local.wast | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index be6538a948f..946e749998f 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2216,3 +2216,48 @@ (local.get $temp2) ) ) + +;; Arrays. +(module + ;; CHECK: (type $array (array (mut i32))) + (type $array (array (mut i32))) + + ;; CHECK: (func $array.new (type $1) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.set $array + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.get $array + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.new + (local $temp (ref $array)) + (local.set $temp + (array.new_default $array + (i32.const 5) + ) + ) + (array.set $array + (local.get $temp) + (i32.const 2) + (i32.const 42) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 2) + ) + ) + ) +) From 86c98ad4dcd60cc8a77c54e546a6f603e50bbadd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:08:30 -0700 Subject: [PATCH 15/53] work --- src/passes/Heap2Local.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 156074c6264..fb7ee6c3bfc 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -415,8 +415,19 @@ struct EscapeAnalyzer { escapes = false; fullyConsumes = true; } - - // TODO Array and I31 operations + void visitArraySet(ArraySet* curr) { + // As StructGet. + if (curr->ref == child) { + escapes = false; + fullyConsumes = true; + } + } + void visitArrayGet(ArrayGet* curr) { + // As StructSet. + escapes = false; + fullyConsumes = true; + } + // TODO other GC operations } checker; checker.allocation = allocation; @@ -788,14 +799,13 @@ struct Array2Struct : PostWalker { Function* func; Module& wasm; Builder builder; - const FieldList& fields; Array2Struct(Expression* allocation, const EscapeAnalyzer& analyzer, Function* func, Module& wasm) : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), - builder(wasm), fields(allocation->type.getHeapType().getStruct().fields) { + builder(wasm) { // Build a proper struct type: as many fields as the size of the array, all // of the same type as the array's element. @@ -869,6 +879,15 @@ struct Array2Struct : PostWalker { return; } + // As mentioned above we do not fix up types along the way from array to + // struct, but an exception we must make here is to fix the type of the + // |ref| child, which is used in |builder.makeStructGet| (if it is + // reachable). + if (curr->ref->type != Type::unreachable && + curr->ref->type.getHeapType().isArray()) { + curr->ref->type = structNew->type; + } + // Convert the ArrayGet into a StructGet. replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_)); } From a51a7cb65ad92621495c1352693843fb8c1e2f9c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:08:42 -0700 Subject: [PATCH 16/53] test --- test/lit/passes/heap2local.wast | 48 +++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 946e749998f..56bc80543e4 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2222,22 +2222,48 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new (type $1) + ;; CHECK: (func $array.new (type $0) ;; CHECK-NEXT: (local $temp (ref $array)) - ;; CHECK-NEXT: (local.set $temp - ;; CHECK-NEXT: (array.new_default $array - ;; CHECK-NEXT: (i32.const 5) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (array.set $array - ;; CHECK-NEXT: (local.get $temp) - ;; CHECK-NEXT: (i32.const 2) - ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (block ;; (replaces unreachable StructSet we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (array.get $array - ;; CHECK-NEXT: (local.get $temp) - ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: (block ;; (replaces unreachable StructGet we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From 3b44e01022fb3bd021d3d7235e2d6d2007f50d16 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:22:10 -0700 Subject: [PATCH 17/53] twork --- src/passes/Heap2Local.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index fb7ee6c3bfc..067fcb67f3a 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -795,13 +795,13 @@ struct Struct2Local : PostWalker { struct Array2Struct : PostWalker { Expression* allocation; // TODO which of these do we need? - const EscapeAnalyzer& analyzer; + EscapeAnalyzer& analyzer; Function* func; Module& wasm; Builder builder; Array2Struct(Expression* allocation, - const EscapeAnalyzer& analyzer, + EscapeAnalyzer& analyzer, Function* func, Module& wasm) : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), @@ -855,12 +855,14 @@ struct Array2Struct : PostWalker { void visitArrayNew(ArrayNew* curr) { if (curr == allocation) { replaceCurrent(structNew); + noteCurrentIsReached(); } } void visitArrayFixed(ArrayNewFixed* curr) { if (curr == allocation) { replaceCurrent(structNew); + noteCurrentIsReached(); } } @@ -872,6 +874,7 @@ struct Array2Struct : PostWalker { // Convert the ArraySet into a StructSet. // TODO: the actual chak that the index is constant, after escape anlysi and before opt! replaceCurrent(builder.makeStructSet(getIndex(curr->index), curr->ref, curr->value)); + noteCurrentIsReached(); } void visitArrayGet(ArrayGet* curr) { @@ -890,12 +893,20 @@ struct Array2Struct : PostWalker { // Convert the ArrayGet into a StructGet. replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_)); + noteCurrentIsReached(); } // Get the value in an expression we know must contain a constant index. Index getIndex(Expression* curr) { return curr->cast()->value.getUnsigned(); } + + void noteCurrentIsReached() { + // Inform the analyzer that the current expression (which we just replaced) + // has been reached in its analysis. We are replacing something it reached, + // and want it to consider it as its equivalent. + analyzer.reached.insert(getCurrent()); + } }; // Core Heap2Local optimization that operates on a function: Builds up the data @@ -968,8 +979,11 @@ struct Heap2Local { if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. +std::cout << "pre: " << *func << '\n'; auto* structNew = Array2Struct(allocation, analyzer, func, wasm).structNew; +std::cout << "mid: " << *func << '\n'; Struct2Local(structNew, analyzer, func, wasm); +std::cout << "post: " << *func << '\n'; } } From e21ce347b79c0d626ef4790ed46830167431343c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:22:29 -0700 Subject: [PATCH 18/53] fix --- test/lit/passes/heap2local.wast | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 56bc80543e4..53528ae92fd 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2249,21 +2249,20 @@ ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block ;; (replaces unreachable StructSet we can't emit) + ;; CHECK-NEXT: (block ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.set $3 ;; CHECK-NEXT: (i32.const 42) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block ;; (replaces unreachable StructGet we can't emit) + ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From 29625792307870dc2a2569882f69e256b219b5c8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:27:35 -0700 Subject: [PATCH 19/53] work --- src/passes/Heap2Local.cpp | 1 + test/lit/passes/heap2local.wast | 69 +++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 067fcb67f3a..3fdc1edfe8f 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -873,6 +873,7 @@ struct Array2Struct : PostWalker { // Convert the ArraySet into a StructSet. // TODO: the actual chak that the index is constant, after escape anlysi and before opt! + // TODO: chaks that indexes are in bounds! replaceCurrent(builder.makeStructSet(getIndex(curr->index), curr->ref, curr->value)); noteCurrentIsReached(); } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 53528ae92fd..2ebee2b2f32 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2227,8 +2227,6 @@ ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) - ;; CHECK-NEXT: (local $4 i32) - ;; CHECK-NEXT: (local $5 i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (local.set $1 @@ -2240,12 +2238,6 @@ ;; CHECK-NEXT: (local.set $3 ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $4 - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $5 - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2253,8 +2245,40 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $3 - ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -2268,15 +2292,38 @@ ;; CHECK-NEXT: ) (func $array.new (local $temp (ref $array)) + ;; This fixed-size array can be replaced with locals. (local.set $temp (array.new_default $array - (i32.const 5) + (i32.const 3) ) ) + (array.set $array + (local.get $temp) + (i32.const 0) + (i32.const 10) + ) + (array.set $array + (local.get $temp) + (i32.const 1) + (i32.const 20) + ) (array.set $array (local.get $temp) (i32.const 2) - (i32.const 42) + (i32.const 30) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 0) + ) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 1) + ) ) (drop (array.get $array From 1e0120104162101a31ab0cfc5bf51126dc887108 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:28:00 -0700 Subject: [PATCH 20/53] work --- test/lit/passes/heap2local.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 2ebee2b2f32..412a6570371 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2222,7 +2222,7 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new (type $0) + ;; CHECK: (func $array.new_default (type $0) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2290,7 +2290,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $array.new + (func $array.new_default (local $temp (ref $array)) ;; This fixed-size array can be replaced with locals. (local.set $temp From 6a39f6a9ae4a19efd8818ba25d5a25728641860e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:31:43 -0700 Subject: [PATCH 21/53] work --- src/passes/Heap2Local.cpp | 2 +- test/lit/passes/heap2local.wast | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 3fdc1edfe8f..92d758f15a1 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -859,7 +859,7 @@ struct Array2Struct : PostWalker { } } - void visitArrayFixed(ArrayNewFixed* curr) { + void visitArrayNewFixed(ArrayNewFixed* curr) { if (curr == allocation) { replaceCurrent(structNew); noteCurrentIsReached(); diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 412a6570371..9f8445baa46 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2332,4 +2332,66 @@ ) ) ) + + ;; CHECK: (func $array.new_fixed (type $0) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.new_fixed + (local $temp (ref $array)) + ;; This is also optimizable. + (local.set $temp + (array.new_fixed $array 2 + (i32.const 42) + (i32.const 1337) + ) + ) + (array.set $array + (local.get $temp) + (i32.const 1) + (i32.const 11) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 0) + ) + ) + ) ) From faad72d41718e40dbdab5d4f0692e0d78de38244 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:39:21 -0700 Subject: [PATCH 22/53] work --- src/passes/Heap2Local.cpp | 36 ++++++++++++++++++++++++++------- test/lit/passes/heap2local.wast | 22 ++++++++++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 92d758f15a1..e1f6cc26ee2 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -828,15 +828,29 @@ struct Array2Struct : PostWalker { if (auto* arrayNew = allocation->dynCast()) { if (arrayNew->isWithDefault()) { structNew = builder.makeStructNew(structType, {}); + arrayNewReplacement = structNew; } else { // The ArrayNew splatting a single value onto each slot of the array. To // do the same for the struct, we store that value in an local and // generate multiple local.gets of it. - abort(); // FIXME we need to replace the arrayNew with a block etc. + auto local = builder.addVar(func, element.type); + auto* set = builder.makeLocalSet(local, arrayNew->init); + std::vector gets; + for (Index i = 0; i < numFields; i++) { + gets.push_back(builder.makeLocalGet(local, element.type)); + } + structNew = builder.makeStructNew(structType, gets); + // The ArrayNew* will be replaced with a block containing the local.set + // and the structNew. + arrayNewReplacement = builder.makeSequence(set, structNew); + // The data flows through the new block we just added: inform the + // analysis of that. + noteIsReached(arrayNewReplacement); } } else if (auto* arrayNewFixed = allocation->dynCast()) { // Simply use the same values as the array. structNew = builder.makeStructNew(structType, arrayNewFixed->values); + arrayNewReplacement = structNew; } else { WASM_UNREACHABLE("bad allocation"); } @@ -852,16 +866,20 @@ struct Array2Struct : PostWalker { // optimize that StructNew using Struct2Local. StructNew* structNew; + // The replacement for the original ArrayNew*. Typically this is |structNew|, + // unless we have additional code we need alongside it. + Expression* arrayNewReplacement; + void visitArrayNew(ArrayNew* curr) { if (curr == allocation) { - replaceCurrent(structNew); + replaceCurrent(arrayNewReplacement); noteCurrentIsReached(); } } void visitArrayNewFixed(ArrayNewFixed* curr) { if (curr == allocation) { - replaceCurrent(structNew); + replaceCurrent(arrayNewReplacement); noteCurrentIsReached(); } } @@ -902,11 +920,15 @@ struct Array2Struct : PostWalker { return curr->cast()->value.getUnsigned(); } + // Inform the analyzer that the current expression (which we just replaced) + // has been reached in its analysis. We are replacing something it reached, + // and want it to consider it as its equivalent. void noteCurrentIsReached() { - // Inform the analyzer that the current expression (which we just replaced) - // has been reached in its analysis. We are replacing something it reached, - // and want it to consider it as its equivalent. - analyzer.reached.insert(getCurrent()); + noteIsReached(getCurrent()); + } + + void noteIsReached(Expression* curr) { + analyzer.reached.insert(curr); } }; diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 9f8445baa46..72e9646eb41 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2333,6 +2333,28 @@ ) ) + (func $array.new + (local $temp (ref $array)) + ;; This is also optimizable. + (local.set $temp + (array.new $array + (i32.const 1337) + (i32.const 2) + ) + ) + (array.set $array + (local.get $temp) + (i32.const 0) + (i32.const -1) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 1) + ) + ) + ) + ;; CHECK: (func $array.new_fixed (type $0) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) From d5383ada92a3bb9542c958e0a128e70464e1244a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:40:33 -0700 Subject: [PATCH 23/53] work --- test/lit/passes/heap2local.wast | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 72e9646eb41..c69873d18b0 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2333,6 +2333,52 @@ ) ) + ;; CHECK: (func $array.new (type $0) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $array.new (local $temp (ref $array)) ;; This is also optimizable. From bfab49547b28e94339cb3399e94174413f2d22ee Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 13:41:19 -0700 Subject: [PATCH 24/53] work --- test/lit/passes/heap2local.wast | 62 ++++++++------------------------- 1 file changed, 14 insertions(+), 48 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index c69873d18b0..66cc89bc48a 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2222,7 +2222,7 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new_default (type $0) + ;; CHECK: (func $array.new_default (type $2) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2333,7 +2333,7 @@ ) ) - ;; CHECK: (func $array.new (type $0) + ;; CHECK: (func $array.new (type $1) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2341,7 +2341,7 @@ ;; CHECK-NEXT: (local $4 i32) ;; CHECK-NEXT: (local $5 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $3)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) @@ -2362,24 +2362,14 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $2 - ;; CHECK-NEXT: (i32.const -1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.null none) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $3) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $array.new + (func $array.new (result i32) (local $temp (ref $array)) ;; This is also optimizable. (local.set $temp @@ -2388,20 +2378,13 @@ (i32.const 2) ) ) - (array.set $array + (array.get $array (local.get $temp) - (i32.const 0) - (i32.const -1) - ) - (drop - (array.get $array - (local.get $temp) - (i32.const 1) - ) + (i32.const 1) ) ) - ;; CHECK: (func $array.new_fixed (type $0) + ;; CHECK: (func $array.new_fixed (type $1) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2424,24 +2407,14 @@ ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $2 - ;; CHECK-NEXT: (i32.const 11) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.null none) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $array.new_fixed + (func $array.new_fixed (result i32) (local $temp (ref $array)) ;; This is also optimizable. (local.set $temp @@ -2450,16 +2423,9 @@ (i32.const 1337) ) ) - (array.set $array + (array.get $array (local.get $temp) - (i32.const 1) - (i32.const 11) - ) - (drop - (array.get $array - (local.get $temp) - (i32.const 0) - ) + (i32.const 0) ) ) ) From fd102dd0be334a0d398d760bde031309519c9326 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:46:27 -0700 Subject: [PATCH 25/53] fix --- src/passes/Heap2Local.cpp | 34 ++++++++++++++++++++++++++------- test/lit/passes/heap2local.wast | 29 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index e1f6cc26ee2..b622716d02e 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -810,7 +810,6 @@ struct Array2Struct : PostWalker { // Build a proper struct type: as many fields as the size of the array, all // of the same type as the array's element. auto element = allocation->type.getHeapType().getArray().element; - Index numFields; if (auto* arrayNew = allocation->dynCast()) { numFields = getIndex(arrayNew->size); } else if (auto* arrayNewFixed = allocation->dynCast()) { @@ -862,6 +861,10 @@ struct Array2Struct : PostWalker { walk(func->body); } + // The number of slots in the array (which will become the number of fields in + // the struct). + Index numFields; + // The StructNew that replaces the ArrayNew*. The user of this class can then // optimize that StructNew using Struct2Local. StructNew* structNew; @@ -891,8 +894,16 @@ struct Array2Struct : PostWalker { // Convert the ArraySet into a StructSet. // TODO: the actual chak that the index is constant, after escape anlysi and before opt! - // TODO: chaks that indexes are in bounds! - replaceCurrent(builder.makeStructSet(getIndex(curr->index), curr->ref, curr->value)); + auto index = getIndex(curr->index); + if (index >= numFields) { + // This is an OOB array.set, which means we trap. + replaceCurrent(builder.makeBlock( + { builder.makeDrop(curr->ref), builder.makeDrop(curr->value), + builder.makeUnreachable() } + )); + return; + } + replaceCurrent(builder.makeStructSet(index, curr->ref, curr->value)); noteCurrentIsReached(); } @@ -911,7 +922,16 @@ struct Array2Struct : PostWalker { } // Convert the ArrayGet into a StructGet. - replaceCurrent(builder.makeStructGet(getIndex(curr->index), curr->ref, curr->type, curr->signed_)); + auto index = getIndex(curr->index); + if (index >= numFields) { + // This is an OOB array.set, which means we trap. + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeUnreachable() + )); + return; + } + replaceCurrent(builder.makeStructGet(index, curr->ref, curr->type, curr->signed_)); noteCurrentIsReached(); } @@ -1002,11 +1022,11 @@ struct Heap2Local { if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. -std::cout << "pre: " << *func << '\n'; +std::cerr << "pre: " << *func << '\n'; auto* structNew = Array2Struct(allocation, analyzer, func, wasm).structNew; -std::cout << "mid: " << *func << '\n'; +std::cerr << "mid: " << *func << '\n'; Struct2Local(structNew, analyzer, func, wasm); -std::cout << "post: " << *func << '\n'; +std::cerr << "post: " << *func << '\n'; } } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 66cc89bc48a..cabec3a9807 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2289,6 +2289,23 @@ ;; CHECK-NEXT: (local.get $3) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $array.new_default (local $temp (ref $array)) @@ -2331,6 +2348,18 @@ (i32.const 2) ) ) + ;; OOB operations trap at runtime. + (array.set $array + (local.get $temp) + (i32.const 3) + (i32.const 40) + ) + (drop + (array.get $array + (local.get $temp) + (i32.const 3) + ) + ) ) ;; CHECK: (func $array.new (type $1) (result i32) From 0dd4970428a0855eac39033c70895a7a500cf761 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:47:46 -0700 Subject: [PATCH 26/53] work --- src/passes/Heap2Local.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index b622716d02e..cc4b48b45ec 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -892,17 +892,17 @@ struct Array2Struct : PostWalker { return; } - // Convert the ArraySet into a StructSet. - // TODO: the actual chak that the index is constant, after escape anlysi and before opt! + // If this is an OOB array.set then we trap. auto index = getIndex(curr->index); if (index >= numFields) { - // This is an OOB array.set, which means we trap. replaceCurrent(builder.makeBlock( { builder.makeDrop(curr->ref), builder.makeDrop(curr->value), builder.makeUnreachable() } )); return; } + + // Convert the ArraySet into a StructSet. replaceCurrent(builder.makeStructSet(index, curr->ref, curr->value)); noteCurrentIsReached(); } @@ -921,16 +921,17 @@ struct Array2Struct : PostWalker { curr->ref->type = structNew->type; } - // Convert the ArrayGet into a StructGet. + // If this is an OOB array.get then we trap. auto index = getIndex(curr->index); if (index >= numFields) { - // This is an OOB array.set, which means we trap. replaceCurrent(builder.makeSequence( builder.makeDrop(curr->ref), builder.makeUnreachable() )); return; } + + // Convert the ArrayGet into a StructGet. replaceCurrent(builder.makeStructGet(index, curr->ref, curr->type, curr->signed_)); noteCurrentIsReached(); } From 4bd7a379027638c849065c71037cc9add081de18 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:53:43 -0700 Subject: [PATCH 27/53] work --- src/passes/Heap2Local.cpp | 11 +++++- test/lit/passes/heap2local.wast | 62 ++++++++++++++++++++++++++------- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index cc4b48b45ec..6a7b5dab012 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -416,6 +416,13 @@ struct EscapeAnalyzer { fullyConsumes = true; } void visitArraySet(ArraySet* curr) { + if (!curr->index->is()) { + // Array operations on nonconstant indexes do not escape in the normal + // sense, but they do escape from our being able to analyze them, so + // stop as soon as we see one. + return; + } + // As StructGet. if (curr->ref == child) { escapes = false; @@ -423,7 +430,9 @@ struct EscapeAnalyzer { } } void visitArrayGet(ArrayGet* curr) { - // As StructSet. + if (!curr->index->is()) { + return; + } escapes = false; fullyConsumes = true; } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index cabec3a9807..7528b90e7ad 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2222,7 +2222,7 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new_default (type $2) + ;; CHECK: (func $array.new_default (type $3) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2370,7 +2370,7 @@ ;; CHECK-NEXT: (local $4 i32) ;; CHECK-NEXT: (local $5 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $3)) + ;; CHECK-NEXT: (block (result (ref null $4)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) @@ -2413,26 +2413,26 @@ ) ) - ;; CHECK: (func $array.new_fixed (type $1) (result i32) + ;; CHECK: (func $array.new_fixed (type $2) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) - ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) - ;; CHECK-NEXT: (local.set $3 - ;; CHECK-NEXT: (i32.const 42) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $4 - ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: (call $get-i32) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.set $1 - ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (local.get $4) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2440,15 +2440,15 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $array.new_fixed (result i32) + (func $array.new_fixed (param $x i32) (result i32) (local $temp (ref $array)) ;; This is also optimizable. (local.set $temp (array.new_fixed $array 2 - (i32.const 42) + (call $get-i32) ;; test side effects in a value (i32.const 1337) ) ) @@ -2457,4 +2457,40 @@ (i32.const 0) ) ) + + ;; CHECK: (func $get-i32 (type $1) (result i32) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + (func $get-i32 (result i32) + ;; Helper for the above. + (i32.const 1337) + ) + + ;; CHECK: (func $array.nonconstant_size (type $2) (param $x i32) (result i32) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (array.new $array + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.get $array + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.nonconstant_size (param $x i32) (result i32) + (local $temp (ref $array)) + (local.set $temp + (array.new $array + (i32.const 42) + ;; We cannot optimize a nonconstant size. + (local.get $x) + ) + ) + (array.get $array + (local.get $temp) + (i32.const 0) + ) + ) ) From bdb88d6268195d61cddcad9111c0b5e1d09010df Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:54:24 -0700 Subject: [PATCH 28/53] work --- test/lit/passes/heap2local.wast | 36 +++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 7528b90e7ad..d1c46a7d867 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2362,7 +2362,7 @@ ) ) - ;; CHECK: (func $array.new (type $1) (result i32) + ;; CHECK: (func $array.new (type $2) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2413,7 +2413,7 @@ ) ) - ;; CHECK: (func $array.new_fixed (type $2) (param $x i32) (result i32) + ;; CHECK: (func $array.new_fixed (type $1) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) @@ -2458,7 +2458,7 @@ ) ) - ;; CHECK: (func $get-i32 (type $1) (result i32) + ;; CHECK: (func $get-i32 (type $2) (result i32) ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) (func $get-i32 (result i32) @@ -2466,7 +2466,7 @@ (i32.const 1337) ) - ;; CHECK: (func $array.nonconstant_size (type $2) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_size (type $1) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2493,4 +2493,32 @@ (i32.const 0) ) ) + + ;; CHECK: (func $array.nonconstant_get (type $1) (param $x i32) (result i32) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (array.new $array + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.get $array + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.nonconstant_get (param $x i32) (result i32) + (local $temp (ref $array)) + (local.set $temp + (array.new $array + (i32.const 42) + (i32.const 3) + ) + ) + (array.get $array + (local.get $temp) + ;; We cannot optimize a nonconstant get. + (local.get $x) + ) + ) ) From 3a195cf75019eb6683b3b769762495180bc70ee5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:55:12 -0700 Subject: [PATCH 29/53] work --- test/lit/passes/heap2local.wast | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index d1c46a7d867..28d42464da5 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2521,4 +2521,34 @@ (local.get $x) ) ) + + ;; CHECK: (func $array.nonconstant_set (type $5) (param $x i32) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local.set $temp + ;; CHECK-NEXT: (array.new $array + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (array.set $array + ;; CHECK-NEXT: (local.get $temp) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.nonconstant_set (param $x i32) + (local $temp (ref $array)) + (local.set $temp + (array.new $array + (i32.const 42) + (i32.const 3) + ) + ) + (array.set $array + (local.get $temp) + ;; We cannot optimize a nonconstant set. + (local.get $x) + (i32.const 100) + ) + ) ) From 1b1cc3b273f0182d61d96f895b0ec9f870a4d5ef Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:57:12 -0700 Subject: [PATCH 30/53] work --- test/lit/passes/heap2local.wast | 105 ++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 12 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 28d42464da5..94ae8c0ae4b 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2362,7 +2362,7 @@ ) ) - ;; CHECK: (func $array.new (type $2) (result i32) + ;; CHECK: (func $array.new (type $1) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2413,7 +2413,7 @@ ) ) - ;; CHECK: (func $array.new_fixed (type $1) (param $x i32) (result i32) + ;; CHECK: (func $array.new_fixed (type $2) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) @@ -2458,15 +2458,7 @@ ) ) - ;; CHECK: (func $get-i32 (type $2) (result i32) - ;; CHECK-NEXT: (i32.const 1337) - ;; CHECK-NEXT: ) - (func $get-i32 (result i32) - ;; Helper for the above. - (i32.const 1337) - ) - - ;; CHECK: (func $array.nonconstant_size (type $1) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_size (type $2) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2494,7 +2486,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_get (type $1) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_get (type $2) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2551,4 +2543,93 @@ (i32.const 100) ) ) + + ;; CHECK: (func $array.folded (type $1) (result i32) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (local $6 i32) + ;; CHECK-NEXT: (local $7 i32) + ;; CHECK-NEXT: (local $8 i32) + ;; CHECK-NEXT: (local $9 i32) + ;; CHECK-NEXT: (local $10 i32) + ;; CHECK-NEXT: (local $11 i32) + ;; CHECK-NEXT: (local $12 i32) + ;; CHECK-NEXT: (local $13 i32) + ;; CHECK-NEXT: (local $14 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $6)) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (call $get-i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $9 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $10 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $11 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $12 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $13 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $14 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $9) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (local.get $11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (local.get $12) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (local.get $13) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $14) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $7) + ;; CHECK-NEXT: ) + (func $array.folded (result i32) + ;; Test a form without local.get/set operations. + (array.get $array + (array.new $array + (call $get-i32) + (i32.const 7) + ) + (i32.const 6) + ) + ) + + ;; CHECK: (func $get-i32 (type $1) (result i32) + ;; CHECK-NEXT: (i32.const 1337) + ;; CHECK-NEXT: ) + (func $get-i32 (result i32) + ;; Helper for the above. + (i32.const 1337) + ) ) From 996ab0557edd589101681ecbbb643400b99b7515 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:58:58 -0700 Subject: [PATCH 31/53] work --- src/passes/Heap2Local.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 6a7b5dab012..f7a5bdc2920 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -803,17 +803,15 @@ struct Struct2Local : PostWalker { // efficient, but it would need to be more complex. struct Array2Struct : PostWalker { Expression* allocation; - // TODO which of these do we need? EscapeAnalyzer& analyzer; Function* func; - Module& wasm; Builder builder; Array2Struct(Expression* allocation, EscapeAnalyzer& analyzer, Function* func, Module& wasm) - : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), + : allocation(allocation), analyzer(analyzer), func(func), builder(wasm) { // Build a proper struct type: as many fields as the size of the array, all @@ -1032,16 +1030,12 @@ struct Heap2Local { if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. -std::cerr << "pre: " << *func << '\n'; auto* structNew = Array2Struct(allocation, analyzer, func, wasm).structNew; -std::cerr << "mid: " << *func << '\n'; Struct2Local(structNew, analyzer, func, wasm); -std::cerr << "post: " << *func << '\n'; } } - // Next, process all structNews, both original ones and ones that used to be - // arrays. + // Next, process all structNews. for (auto* allocation : finder.structNews) { // As above, we must be able to use locals for this data. if (!canHandleAsLocals(allocation->type)) { From 7ebb09401acf1f6eea702ec8cd5f8af4ce5606cd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 14:59:55 -0700 Subject: [PATCH 32/53] test --- scripts/fuzz_opt.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 3c5508fa2bb..0ee24081bc9 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1528,6 +1528,25 @@ def write_commands(commands, filename): ("--gufa-optimizing",), ("--local-cse",), ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), + ("--heap2local",), ("--remove-unused-names", "--heap2local",), ("--generate-stack-ir",), ("--licm",), From 00616b80169f6066b798ef1f11f59592a5efcb40 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 15:32:13 -0700 Subject: [PATCH 33/53] fix --- src/passes/Heap2Local.cpp | 24 ++- test/lit/passes/heap2local.wast | 253 ++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 3 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index f7a5bdc2920..09d57b7a8ab 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -998,16 +998,34 @@ struct Heap2Local { } void visitArrayNew(ArrayNew* curr) { // Only new arrays of fixed size are relevant for us. - if (curr->type != Type::unreachable && curr->size->is()) { - // TODO: Some reasonable limit on size? Also above and below. + if (curr->type != Type::unreachable && isValidSize(curr->size)) { arrayNews.push_back(curr); } } void visitArrayNewFixed(ArrayNewFixed* curr) { - if (curr->type != Type::unreachable) { + if (curr->type != Type::unreachable && + isValidSize(curr->values.size())) { arrayNews.push_back(curr); } } + + bool isValidSize(Expression* size) { + // The size of an array is valid if it is constant, and its value is + // valid. + if (auto* c = size->dynCast()) { + return isValidSize(c->value.getUnsigned()); + } + return false; + } + + bool isValidSize(Index size) { + // Set a reasonable limit on the size here, as valid wasm can contain + // things like (array.new (i32.const -1)) which will likely fail at + // runtime on a VM limitation on array size. We also are converting a + // heap allocation to a stack allocation, which can be noticeable in + // some cases, so to be careful here use a fairly small limit. + return size < 20; + } } finder; finder.walk(func->body); diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 94ae8c0ae4b..178d75264b0 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2625,6 +2625,259 @@ ) ) + ;; CHECK: (func $array.folded.multiple (type $3) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (local $4 i32) + ;; CHECK-NEXT: (local $5 i32) + ;; CHECK-NEXT: (local $6 i32) + ;; CHECK-NEXT: (local $7 i32) + ;; CHECK-NEXT: (local $8 i32) + ;; CHECK-NEXT: (local $9 i32) + ;; CHECK-NEXT: (local $10 i32) + ;; CHECK-NEXT: (local $11 i32) + ;; CHECK-NEXT: (local $12 i32) + ;; CHECK-NEXT: (local $13 i32) + ;; CHECK-NEXT: (local $14 i32) + ;; CHECK-NEXT: (local $15 i32) + ;; CHECK-NEXT: (local $16 i32) + ;; CHECK-NEXT: (local $17 i32) + ;; CHECK-NEXT: (local $18 i32) + ;; CHECK-NEXT: (local $19 i32) + ;; CHECK-NEXT: (local $20 i32) + ;; CHECK-NEXT: (local $21 i32) + ;; CHECK-NEXT: (local $22 i32) + ;; CHECK-NEXT: (local $23 i32) + ;; CHECK-NEXT: (local $24 i32) + ;; CHECK-NEXT: (local $25 i32) + ;; CHECK-NEXT: (local $26 i32) + ;; CHECK-NEXT: (local $27 i32) + ;; CHECK-NEXT: (local $28 i32) + ;; CHECK-NEXT: (local $29 i32) + ;; CHECK-NEXT: (local $30 i32) + ;; CHECK-NEXT: (local $31 i32) + ;; CHECK-NEXT: (local $32 i32) + ;; CHECK-NEXT: (local $33 i32) + ;; CHECK-NEXT: (local $34 i32) + ;; CHECK-NEXT: (local $35 i32) + ;; CHECK-NEXT: (local $36 i32) + ;; CHECK-NEXT: (local $37 i32) + ;; CHECK-NEXT: (local $38 i32) + ;; CHECK-NEXT: (local $39 i32) + ;; CHECK-NEXT: (local $40 i32) + ;; CHECK-NEXT: (local $41 i32) + ;; CHECK-NEXT: (local $42 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $7)) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (call $get-i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $8)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (call $get-i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $9)) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (call $get-i32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $24 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $25 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $26 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $27 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $28 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $29 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $30 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $31 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $32 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $33 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $34 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $35 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $36 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $37 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $38 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $39 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $40 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $41 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $42 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (local.get $24) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (local.get $25) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $7 + ;; CHECK-NEXT: (local.get $26) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $8 + ;; CHECK-NEXT: (local.get $27) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $9 + ;; CHECK-NEXT: (local.get $28) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $10 + ;; CHECK-NEXT: (local.get $29) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $11 + ;; CHECK-NEXT: (local.get $30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $12 + ;; CHECK-NEXT: (local.get $31) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $13 + ;; CHECK-NEXT: (local.get $32) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $14 + ;; CHECK-NEXT: (local.get $33) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $15 + ;; CHECK-NEXT: (local.get $34) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $16 + ;; CHECK-NEXT: (local.get $35) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $17 + ;; CHECK-NEXT: (local.get $36) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $18 + ;; CHECK-NEXT: (local.get $37) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $19 + ;; CHECK-NEXT: (local.get $38) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $20 + ;; CHECK-NEXT: (local.get $39) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $21 + ;; CHECK-NEXT: (local.get $40) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $22 + ;; CHECK-NEXT: (local.get $41) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $23 + ;; CHECK-NEXT: (local.get $42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $11) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.get $array + ;; CHECK-NEXT: (array.new $array + ;; CHECK-NEXT: (call $get-i32) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.folded.multiple + ;; We allow sizes 0-19, so these will be optimized. + (drop + (array.new $array + (call $get-i32) + (i32.const 0) + ) + ) + (drop + (array.get $array + (array.new $array + (call $get-i32) + (i32.const 1) + ) + (i32.const 0) + ) + ) + (drop + (array.get $array + (array.new $array + (call $get-i32) + (i32.const 19) + ) + (i32.const 6) + ) + ) + + ;; 20 is too much, however. + (drop + (array.get $array + (array.new $array + (call $get-i32) + (i32.const 20) + ) + (i32.const 6) + ) + ) + ) + ;; CHECK: (func $get-i32 (type $1) (result i32) ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) From e6480d64526fb46d2a90129a775b50685107f17c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 15:32:32 -0700 Subject: [PATCH 34/53] format --- src/passes/Heap2Local.cpp | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 09d57b7a8ab..4a96c4c087f 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -811,8 +811,7 @@ struct Array2Struct : PostWalker { EscapeAnalyzer& analyzer, Function* func, Module& wasm) - : allocation(allocation), analyzer(analyzer), func(func), - builder(wasm) { + : allocation(allocation), analyzer(analyzer), func(func), builder(wasm) { // Build a proper struct type: as many fields as the size of the array, all // of the same type as the array's element. @@ -902,10 +901,9 @@ struct Array2Struct : PostWalker { // If this is an OOB array.set then we trap. auto index = getIndex(curr->index); if (index >= numFields) { - replaceCurrent(builder.makeBlock( - { builder.makeDrop(curr->ref), builder.makeDrop(curr->value), - builder.makeUnreachable() } - )); + replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref), + builder.makeDrop(curr->value), + builder.makeUnreachable()})); return; } @@ -931,15 +929,14 @@ struct Array2Struct : PostWalker { // If this is an OOB array.get then we trap. auto index = getIndex(curr->index); if (index >= numFields) { - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeUnreachable() - )); + replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), + builder.makeUnreachable())); return; } // Convert the ArrayGet into a StructGet. - replaceCurrent(builder.makeStructGet(index, curr->ref, curr->type, curr->signed_)); + replaceCurrent( + builder.makeStructGet(index, curr->ref, curr->type, curr->signed_)); noteCurrentIsReached(); } @@ -951,13 +948,9 @@ struct Array2Struct : PostWalker { // Inform the analyzer that the current expression (which we just replaced) // has been reached in its analysis. We are replacing something it reached, // and want it to consider it as its equivalent. - void noteCurrentIsReached() { - noteIsReached(getCurrent()); - } + void noteCurrentIsReached() { noteIsReached(getCurrent()); } - void noteIsReached(Expression* curr) { - analyzer.reached.insert(curr); - } + void noteIsReached(Expression* curr) { analyzer.reached.insert(curr); } }; // Core Heap2Local optimization that operates on a function: Builds up the data @@ -1048,7 +1041,8 @@ struct Heap2Local { if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. - auto* structNew = Array2Struct(allocation, analyzer, func, wasm).structNew; + auto* structNew = + Array2Struct(allocation, analyzer, func, wasm).structNew; Struct2Local(structNew, analyzer, func, wasm); } } From bffbdd8df0395698e9edd00f865810f78c97fca5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 16:42:10 -0700 Subject: [PATCH 35/53] fix --- src/passes/Heap2Local.cpp | 11 +++++++ test/lit/passes/heap2local.wast | 52 +++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 4a96c4c087f..a9a17b9eff8 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -865,8 +865,15 @@ struct Array2Struct : PostWalker { // technically have the new struct type, but we are going to optimize the // struct into locals anyhow. walk(func->body); + + if (refinalize) { + ReFinalize().walkFunctionInModule(func, &wasm); + } } + // In rare cases we may need to refinalize, as with Struct2Local. + bool refinalize = false; + // The number of slots in the array (which will become the number of fields in // the struct). Index numFields; @@ -904,6 +911,8 @@ struct Array2Struct : PostWalker { replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref), builder.makeDrop(curr->value), builder.makeUnreachable()})); + // We added an unreachable, and must propagate that type. + refinalize = true; return; } @@ -931,6 +940,8 @@ struct Array2Struct : PostWalker { if (index >= numFields) { replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), builder.makeUnreachable())); + // We added an unreachable, and must propagate that type. + refinalize = true; return; } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 178d75264b0..285de2b2402 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2222,7 +2222,7 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new_default (type $3) + ;; CHECK: (func $array.new_default (type $2) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2413,7 +2413,7 @@ ) ) - ;; CHECK: (func $array.new_fixed (type $2) (param $x i32) (result i32) + ;; CHECK: (func $array.new_fixed (type $3) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) @@ -2458,7 +2458,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_size (type $2) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_size (type $3) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2486,7 +2486,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_get (type $2) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_get (type $3) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2625,7 +2625,7 @@ ) ) - ;; CHECK: (func $array.folded.multiple (type $3) + ;; CHECK: (func $array.folded.multiple (type $2) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2878,6 +2878,48 @@ ) ) + ;; CHECK: (func $array.nested.refinalize.get (type $1) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $array.nested.refinalize.get (result i32) + ;; The get here is of an index that is out of bounds, and will trap. We + ;; should refinalize so the unreachability is propagated and we do not + ;; error on validation. + (array.get $array + (array.new_default $array + (i32.const 0) + ) + (i32.const 0) + ) + ) + + ;; CHECK: (func $array.nested.refinalize.set (type $2) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $array.nested.refinalize.set + ;; As above, but with a set. + (array.set $array + (array.new_default $array + (i32.const 0) + ) + (i32.const 0) + (i32.const 42) + ) + ) + ;; CHECK: (func $get-i32 (type $1) (result i32) ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) From 1c9e1c51ef66b0b7cf389732f6e94c6248fc29e5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 16:58:38 -0700 Subject: [PATCH 36/53] commento --- src/passes/Heap2Local.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index a9a17b9eff8..b45ea3b96c9 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -799,8 +799,8 @@ struct Struct2Local : PostWalker { // An optimizer that handles the rewriting to turn a nonescaping array // allocation into a struct allocation. Struct2Local can then be run on that // allocation. -// TODO: Doing a single rewrite walk at the end (for all structs) would be more -// efficient, but it would need to be more complex. +// TODO: As with Struct2Local doing a single rewrite walk at the end (for all +// structs) would be more efficient, but more complex. struct Array2Struct : PostWalker { Expression* allocation; EscapeAnalyzer& analyzer; @@ -813,8 +813,8 @@ struct Array2Struct : PostWalker { Module& wasm) : allocation(allocation), analyzer(analyzer), func(func), builder(wasm) { - // Build a proper struct type: as many fields as the size of the array, all - // of the same type as the array's element. + // Build the struct type we need: as many fields as the size of the array, + // all of the same type as the array's element. auto element = allocation->type.getHeapType().getArray().element; if (auto* arrayNew = allocation->dynCast()) { numFields = getIndex(arrayNew->size); @@ -829,13 +829,13 @@ struct Array2Struct : PostWalker { } HeapType structType = Struct(fields); - // Generate a proper StructNew. + // Generate a StructNew to replace the ArrayNew*. if (auto* arrayNew = allocation->dynCast()) { if (arrayNew->isWithDefault()) { structNew = builder.makeStructNew(structType, {}); arrayNewReplacement = structNew; } else { - // The ArrayNew splatting a single value onto each slot of the array. To + // The ArrayNew is writing the same value to each slot of the array. To // do the same for the struct, we store that value in an local and // generate multiple local.gets of it. auto local = builder.addVar(func, element.type); @@ -849,7 +849,9 @@ struct Array2Struct : PostWalker { // and the structNew. arrayNewReplacement = builder.makeSequence(set, structNew); // The data flows through the new block we just added: inform the - // analysis of that. + // analysis of that by telling it to treat it as code that it reached + // (only code we reached during the tracing of the allocation through + // the function will be optimized in Struct2Local). noteIsReached(arrayNewReplacement); } } else if (auto* arrayNewFixed = allocation->dynCast()) { @@ -863,7 +865,8 @@ struct Array2Struct : PostWalker { // Replace the things we need to using the visit* methods. Note that we do // not bother to update types: all the places with the array type should // technically have the new struct type, but we are going to optimize the - // struct into locals anyhow. + // struct into locals anyhow so those types are vanishing. However, during + // internal debugging of this pass you may see stale types. walk(func->body); if (refinalize) { From db6780ffa306fa8eeaad5a4d3868616c1999a3fb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 17:29:32 -0700 Subject: [PATCH 37/53] test --- test/lit/passes/heap2local.wast | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 285de2b2402..5ef1e271ca7 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2920,6 +2920,21 @@ ) ) + (func $array.flowing.type + ;; The array reference here flows through some places that need to be + ;; updated when we optimize. In particular the block's type will change. + (drop + (block $label (result (ref $array)) + (br $label + (array.new $array + (i32.const 1) + (i32.const 1) + ) + ) + ) + ) + ) + ;; CHECK: (func $get-i32 (type $1) (result i32) ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) From e83312274f7e20ce53a9baa9d2b55f8a6332bd46 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Apr 2024 17:29:53 -0700 Subject: [PATCH 38/53] test --- src/passes/Heap2Local.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index b45ea3b96c9..69523717fbb 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -867,6 +867,7 @@ struct Array2Struct : PostWalker { // technically have the new struct type, but we are going to optimize the // struct into locals anyhow so those types are vanishing. However, during // internal debugging of this pass you may see stale types. + // XXX likely we need ot walk |reached| or such walk(func->body); if (refinalize) { From 8a0431abd5b9a2f238d2189cb42425695cdaffca Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 09:22:30 -0700 Subject: [PATCH 39/53] work --- src/passes/Heap2Local.cpp | 24 ++++++--- test/lit/passes/heap2local.wast | 91 +++++++++++++++++++++++++-------- 2 files changed, 87 insertions(+), 28 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 69523717fbb..b4b95b9550f 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -815,7 +815,8 @@ struct Array2Struct : PostWalker { // Build the struct type we need: as many fields as the size of the array, // all of the same type as the array's element. - auto element = allocation->type.getHeapType().getArray().element; + auto arrayType = allocation->type.getHeapType(); + auto element = arrayType.getArray().element; if (auto* arrayNew = allocation->dynCast()) { numFields = getIndex(arrayNew->size); } else if (auto* arrayNewFixed = allocation->dynCast()) { @@ -862,14 +863,23 @@ struct Array2Struct : PostWalker { WASM_UNREACHABLE("bad allocation"); } - // Replace the things we need to using the visit* methods. Note that we do - // not bother to update types: all the places with the array type should - // technically have the new struct type, but we are going to optimize the - // struct into locals anyhow so those types are vanishing. However, during - // internal debugging of this pass you may see stale types. - // XXX likely we need ot walk |reached| or such + // Replace the things we need to using the visit* methods. walk(func->body); + // Update types along the path reached by the allocation: whenever we see + // the array type, it should be the struct type. + auto nullArray = Type(arrayType, Nullable); + auto nonNullArray = Type(arrayType, NonNullable); + auto nullStruct = Type(structType, Nullable); + auto nonNullStruct = Type(structType, NonNullable); + for (auto* reached : analyzer.reached) { + if (reached->type == nullArray) { + reached->type = nullStruct; + } else if (reached->type == nonNullArray) { + reached->type = nonNullStruct; + } + } + if (refinalize) { ReFinalize().walkFunctionInModule(func, &wasm); } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 5ef1e271ca7..4c3e3bb47ca 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -1,4 +1,4 @@ -;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; (remove-unused-names allows the pass to see that blocks flow values) ;; RUN: foreach %s %t wasm-opt -all --remove-unused-names --heap2local -S -o - | filecheck %s @@ -2222,7 +2222,7 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (func $array.new_default (type $2) + ;; CHECK: (func $array.new_default (type $3) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2370,7 +2370,7 @@ ;; CHECK-NEXT: (local $4 i32) ;; CHECK-NEXT: (local $5 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $4)) + ;; CHECK-NEXT: (block (result (ref null $5)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) @@ -2413,7 +2413,7 @@ ) ) - ;; CHECK: (func $array.new_fixed (type $3) (param $x i32) (result i32) + ;; CHECK: (func $array.new_fixed (type $4) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) @@ -2458,7 +2458,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_size (type $3) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_size (type $4) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2486,7 +2486,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_get (type $3) (param $x i32) (result i32) + ;; CHECK: (func $array.nonconstant_get (type $4) (param $x i32) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2514,7 +2514,7 @@ ) ) - ;; CHECK: (func $array.nonconstant_set (type $5) (param $x i32) + ;; CHECK: (func $array.nonconstant_set (type $6) (param $x i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local.set $temp ;; CHECK-NEXT: (array.new $array @@ -2561,7 +2561,7 @@ ;; CHECK-NEXT: (local $13 i32) ;; CHECK-NEXT: (local $14 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $6)) + ;; CHECK-NEXT: (block (result (ref null $7)) ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (call $get-i32) ;; CHECK-NEXT: ) @@ -2625,7 +2625,7 @@ ) ) - ;; CHECK: (func $array.folded.multiple (type $2) + ;; CHECK: (func $array.folded.multiple (type $3) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2670,7 +2670,7 @@ ;; CHECK-NEXT: (local $41 i32) ;; CHECK-NEXT: (local $42 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $7)) + ;; CHECK-NEXT: (block (result (ref null $8)) ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (call $get-i32) ;; CHECK-NEXT: ) @@ -2682,7 +2682,7 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $8)) + ;; CHECK-NEXT: (block (result (ref null $2)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (call $get-i32) ;; CHECK-NEXT: ) @@ -2898,7 +2898,7 @@ ) ) - ;; CHECK: (func $array.nested.refinalize.set (type $2) + ;; CHECK: (func $array.nested.refinalize.set (type $3) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (ref.null none) @@ -2920,19 +2920,68 @@ ) ) - (func $array.flowing.type + ;; CHECK: (func $array.flowing.type (type $1) (result i32) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.flowing.type (result i32) + (local $temp (ref $array)) ;; The array reference here flows through some places that need to be - ;; updated when we optimize. In particular the block's type will change. + ;; updated when we optimize. In particular the blocks' types will change. + (local.set $temp + (array.new $array + (i32.const 1) + (i32.const 1) + ) + ) (drop - (block $label (result (ref $array)) - (br $label - (array.new $array - (i32.const 1) - (i32.const 1) - ) - ) + (block $nullable (result (ref null $array)) + (local.get $temp) ) ) + (drop + (block $non-nullable (result (ref $array)) + (local.get $temp) + ) + ) + (array.get $array + (local.get $temp) + (i32.const 0) + ) ) ;; CHECK: (func $get-i32 (type $1) (result i32) From c793aa8bfe51f307a19891447df17750faf3df08 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 09:23:05 -0700 Subject: [PATCH 40/53] test --- test/lit/passes/heap2local.wast | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 4c3e3bb47ca..29be7a0d79f 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -7,8 +7,18 @@ ;; CHECK: (type $struct.A (struct (field (mut i32)) (field (mut f64)))) (type $struct.A (struct (field (mut i32)) (field (mut f64)))) + ;; CHECK: (type $1 (func)) + + ;; CHECK: (type $2 (func (result f64))) + ;; CHECK: (type $struct.recursive (struct (field (mut (ref null $struct.recursive))))) + ;; CHECK: (type $4 (func (param (ref null $struct.A)))) + + ;; CHECK: (type $5 (func (result i32))) + + ;; CHECK: (type $6 (func (result anyref))) + ;; CHECK: (type $struct.packed (struct (field (mut i8)))) (type $struct.packed (struct (field (mut i8)))) @@ -18,6 +28,14 @@ (type $struct.nonnullable (struct (field (ref $struct.A)))) + ;; CHECK: (type $8 (func (param i32) (result f64))) + + ;; CHECK: (type $9 (func (param (ref null $struct.recursive)))) + + ;; CHECK: (type $10 (func (param (ref $struct.A)))) + + ;; CHECK: (type $11 (func (param i32))) + ;; CHECK: (func $simple (type $1) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 f64) @@ -1957,6 +1975,8 @@ (module ;; CHECK: (type $A (sub (struct (field (ref null $A))))) (type $A (sub (struct (field (ref null $A))))) + ;; CHECK: (type $1 (func (result anyref))) + ;; CHECK: (type $B (sub $A (struct (field (ref $A))))) (type $B (sub $A (struct (field (ref $A))))) @@ -2062,6 +2082,8 @@ (type $A (sub (struct (field (mut i32))))) (type $B (sub $A (struct (field (mut i32))))) + ;; CHECK: (type $0 (func)) + ;; CHECK: (func $func (type $0) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) @@ -2105,6 +2127,10 @@ ;; CHECK: (type $struct (struct (field (mut anyref)))) (type $struct (struct (field (mut anyref)))) + ;; CHECK: (type $1 (func)) + + ;; CHECK: (type $2 (func (result anyref))) + ;; CHECK: (func $multiple-interactions (type $1) ;; CHECK-NEXT: (local $temp (ref $struct)) ;; CHECK-NEXT: (local $1 anyref) @@ -2222,6 +2248,24 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) + ;; CHECK: (type $1 (func (result i32))) + + ;; CHECK: (type $2 (struct (field (mut i32)))) + + ;; CHECK: (type $3 (func)) + + ;; CHECK: (type $4 (func (param i32) (result i32))) + + ;; CHECK: (type $5 (struct (field (mut i32)) (field (mut i32)))) + + ;; CHECK: (type $6 (func (param i32))) + + ;; CHECK: (type $7 (struct (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)))) + + ;; CHECK: (type $8 (struct )) + + ;; CHECK: (type $9 (struct (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)) (field (mut i32)))) + ;; CHECK: (func $array.new_default (type $3) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) From bfceeb8806c41169787b161de7b83a8e2f6f19d9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 12:24:29 -0700 Subject: [PATCH 41/53] work --- src/passes/Heap2Local.cpp | 24 +++++++++++------- test/lit/passes/heap2local.wast | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index b4b95b9550f..d7070a18571 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -1056,8 +1056,7 @@ struct Heap2Local { for (auto* allocation : finder.arrayNews) { // The point of this optimization is to replace heap allocations with // locals, so we must be able to place the data in locals. - auto element = allocation->type.getHeapType().getArray().element; - if (!canHandleAsLocal(element)) { + if (!canHandleAsLocals(allocation->type)) { continue; } @@ -1101,16 +1100,23 @@ struct Heap2Local { } bool canHandleAsLocals(Type type) { - // We filtered out unreachables already. - assert(type != Type::unreachable); + if (type == Type::unreachable) { + return false; + } - auto& fields = type.getHeapType().getStruct().fields; - for (auto field : fields) { - if (!canHandleAsLocal(field)) { - return false; + auto heapType = type.getHeapType(); + if (heapType.isStruct()) { + auto& fields = heapType.getStruct().fields; + for (auto field : fields) { + if (!canHandleAsLocal(field)) { + return false; + } } + return true; } - return true; + + assert(heapType.isArray()); + return canHandleAsLocal(heapType.getArray().element); } }; diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 29be7a0d79f..195ada53aca 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -3036,3 +3036,46 @@ (i32.const 1337) ) ) + +;; Arrays with reference values. +(module + (type $array (sub (array (ref null $array)))) + + ;; CHECK: (type $0 (func)) + + ;; CHECK: (func $nested-unreachable (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block ;; (replaces unreachable ArrayNew we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested-unreachable + ;; The array.get in the middle is out of bounds, and will cause the outer + ;; array.new to become unreachable. + (drop + (array.new $array + (array.get $array + (array.new_default $array + (i32.const 0) + ) + (i32.const 0) + ) + (i32.const 0) + ) + ) + ) +) From 9b2be2220f3d3fcfdd836dab07f23a468f6cb1a9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 12:26:58 -0700 Subject: [PATCH 42/53] test? --- test/lit/passes/heap2local.wast | 65 ++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 195ada53aca..8fa85781d48 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -3039,11 +3039,72 @@ ;; Arrays with reference values. (module + ;; CHECK: (type $array (sub (array (ref null $array)))) (type $array (sub (array (ref null $array)))) - ;; CHECK: (type $0 (func)) - ;; CHECK: (func $nested-unreachable (type $0) + ;; CHECK: (type $1 (func)) + + ;; CHECK: (type $2 (struct (field (ref null $array)) (field (ref null $array)) (field (ref null $array)))) + + ;; CHECK: (func $nested (type $1) + ;; CHECK-NEXT: (local $0 (ref null $array)) + ;; CHECK-NEXT: (local $1 (ref null $array)) + ;; CHECK-NEXT: (local $2 (ref null $array)) + ;; CHECK-NEXT: (local $3 (ref null $array)) + ;; CHECK-NEXT: (local $4 (ref null $array)) + ;; CHECK-NEXT: (local $5 (ref null $array)) + ;; CHECK-NEXT: (local $6 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (array.new $array + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $5 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $6 + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $5) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested + ;; Nested array.new operations, all of whom can be optimized away. + (drop + (array.new $array + (array.new $array + (array.new_default $array + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3) + ) + ) + ) + + ;; CHECK: (func $nested-unreachable (type $1) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block ;; (replaces unreachable ArrayNew we can't emit) ;; CHECK-NEXT: (drop From 58253c77294b4c0b434ec49a2826562fe942eb01 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 12:30:49 -0700 Subject: [PATCH 43/53] fix --- test/lit/passes/heap2local.wast | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 8fa85781d48..a72914de873 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -3090,7 +3090,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $nested - ;; Nested array.new operations, all of whom can be optimized away. + ;; Nested array.new operations, all of whom can be optimized away in + ;; principle. We do only a single iteration here for now, which optimizes + ;; away the outermost array.new (see TODO in the pass about iterations). (drop (array.new $array (array.new $array From 08fe484b79f4a63ff8909fdf844020fc5cf23cb4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 13:12:23 -0700 Subject: [PATCH 44/53] undo --- scripts/fuzz_opt.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 0ee24081bc9..3c5508fa2bb 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -1528,25 +1528,6 @@ def write_commands(commands, filename): ("--gufa-optimizing",), ("--local-cse",), ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), - ("--heap2local",), ("--remove-unused-names", "--heap2local",), ("--generate-stack-ir",), ("--licm",), From 41c0765f8e9e9cb74e943d91cac7e19f950bbfbe Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 13:17:14 -0700 Subject: [PATCH 45/53] nicer --- src/passes/Heap2Local.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index d7070a18571..fc38de194a1 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -863,11 +863,10 @@ struct Array2Struct : PostWalker { WASM_UNREACHABLE("bad allocation"); } - // Replace the things we need to using the visit* methods. - walk(func->body); - // Update types along the path reached by the allocation: whenever we see - // the array type, it should be the struct type. + // the array type, it should be the struct type. Note that we do this before + // the walk that is after us, because the walk may read these types and + // depend on them to be valid. auto nullArray = Type(arrayType, Nullable); auto nonNullArray = Type(arrayType, NonNullable); auto nullStruct = Type(structType, Nullable); @@ -880,6 +879,14 @@ struct Array2Struct : PostWalker { } } + // Technically we should also fix up the types of locals as well, but after + // Struct2Local those locals will no longer be used anyhow, so avoid that + // work (though it makes the IR temporarily invalid in between Array2Struct + // and Struct2Local). + + // Replace the things we need to using the visit* methods. + walk(func->body); + if (refinalize) { ReFinalize().walkFunctionInModule(func, &wasm); } @@ -940,15 +947,6 @@ struct Array2Struct : PostWalker { return; } - // As mentioned above we do not fix up types along the way from array to - // struct, but an exception we must make here is to fix the type of the - // |ref| child, which is used in |builder.makeStructGet| (if it is - // reachable). - if (curr->ref->type != Type::unreachable && - curr->ref->type.getHeapType().isArray()) { - curr->ref->type = structNew->type; - } - // If this is an OOB array.get then we trap. auto index = getIndex(curr->index); if (index >= numFields) { From 37de9a5fd491ddbc72ea71a2f83259ea46fd3022 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 8 Apr 2024 13:17:57 -0700 Subject: [PATCH 46/53] comment --- src/passes/Heap2Local.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index fc38de194a1..28161bbf898 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -880,9 +880,9 @@ struct Array2Struct : PostWalker { } // Technically we should also fix up the types of locals as well, but after - // Struct2Local those locals will no longer be used anyhow, so avoid that - // work (though it makes the IR temporarily invalid in between Array2Struct - // and Struct2Local). + // Struct2Local those locals will no longer be used anyhow (the locals hold + // allocations that are removed), so avoid that work (though it makes the + // IR temporarily invalid in between Array2Struct and Struct2Local). // Replace the things we need to using the visit* methods. walk(func->body); From f1e9013fd35af6fdff4846fedd71ea241edccd48 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 08:34:57 -0700 Subject: [PATCH 47/53] fix --- src/passes/Heap2Local.cpp | 13 ++- test/lit/passes/heap2local.wast | 149 ++++++++++++++++++++++++++++---- 2 files changed, 144 insertions(+), 18 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 28161bbf898..fe2870a78c2 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -872,9 +872,18 @@ struct Array2Struct : PostWalker { auto nullStruct = Type(structType, Nullable); auto nonNullStruct = Type(structType, NonNullable); for (auto* reached : analyzer.reached) { - if (reached->type == nullArray) { + // We must check subtyping here because the allocation may be upcast as it + // flows around. If we do see such upcasting then we are refining here and + // must refinalize. + if (Type::isSubType(nullArray, reached->type)) { + if (nullArray != reached->type) { + refinalize = true; + } reached->type = nullStruct; - } else if (reached->type == nonNullArray) { + } else if (Type::isSubType(nonNullArray, reached->type)) { + if (nonNullArray != reached->type) { + refinalize = true; + } reached->type = nonNullStruct; } } diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index a72914de873..7ff5ad8d092 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2248,9 +2248,9 @@ ;; CHECK: (type $array (array (mut i32))) (type $array (array (mut i32))) - ;; CHECK: (type $1 (func (result i32))) + ;; CHECK: (type $1 (struct (field (mut i32)))) - ;; CHECK: (type $2 (struct (field (mut i32)))) + ;; CHECK: (type $2 (func (result i32))) ;; CHECK: (type $3 (func)) @@ -2406,7 +2406,7 @@ ) ) - ;; CHECK: (func $array.new (type $1) (result i32) + ;; CHECK: (func $array.new (type $2) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2588,7 +2588,7 @@ ) ) - ;; CHECK: (func $array.folded (type $1) (result i32) + ;; CHECK: (func $array.folded (type $2) (result i32) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) @@ -2726,7 +2726,7 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $1)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (call $get-i32) ;; CHECK-NEXT: ) @@ -2922,7 +2922,7 @@ ) ) - ;; CHECK: (func $array.nested.refinalize.get (type $1) (result i32) + ;; CHECK: (func $array.nested.refinalize.get (type $2) (result i32) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (ref.null none) @@ -2964,13 +2964,13 @@ ) ) - ;; CHECK: (func $array.flowing.type (type $1) (result i32) + ;; CHECK: (func $array.flowing.type (type $2) (result i32) ;; CHECK-NEXT: (local $temp (ref $array)) ;; CHECK-NEXT: (local $1 i32) ;; CHECK-NEXT: (local $2 i32) ;; CHECK-NEXT: (local $3 i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $1)) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) @@ -2986,12 +2986,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $1)) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -3022,13 +3032,25 @@ (local.get $temp) ) ) + ;; Test supertypes too. + (drop + (block $non-nullable (result (ref array)) + (local.get $temp) + ) + ) + (drop + (block $non-nullable (result (ref null array)) + (local.get $temp) + ) + ) + ;; Read from the array as well. (array.get $array (local.get $temp) (i32.const 0) ) ) - ;; CHECK: (func $get-i32 (type $1) (result i32) + ;; CHECK: (func $get-i32 (type $2) (result i32) ;; CHECK-NEXT: (i32.const 1337) ;; CHECK-NEXT: ) (func $get-i32 (result i32) @@ -3043,11 +3065,15 @@ (type $array (sub (array (ref null $array)))) - ;; CHECK: (type $1 (func)) + ;; CHECK: (type $1 (struct (field (ref null $array)))) + + ;; CHECK: (type $2 (func)) - ;; CHECK: (type $2 (struct (field (ref null $array)) (field (ref null $array)) (field (ref null $array)))) + ;; CHECK: (type $3 (struct (field (ref null $array)) (field (ref null $array)) (field (ref null $array)))) - ;; CHECK: (func $nested (type $1) + ;; CHECK: (type $4 (func (result anyref))) + + ;; CHECK: (func $nested (type $2) ;; CHECK-NEXT: (local $0 (ref null $array)) ;; CHECK-NEXT: (local $1 (ref null $array)) ;; CHECK-NEXT: (local $2 (ref null $array)) @@ -3056,7 +3082,7 @@ ;; CHECK-NEXT: (local $5 (ref null $array)) ;; CHECK-NEXT: (local $6 (ref null $array)) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result (ref null $2)) + ;; CHECK-NEXT: (block (result (ref null $3)) ;; CHECK-NEXT: (local.set $0 ;; CHECK-NEXT: (array.new $array ;; CHECK-NEXT: (array.new_default $array @@ -3106,7 +3132,7 @@ ) ) - ;; CHECK: (func $nested-unreachable (type $1) + ;; CHECK: (func $nested-unreachable (type $2) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block ;; (replaces unreachable ArrayNew we can't emit) ;; CHECK-NEXT: (drop @@ -3141,4 +3167,95 @@ ) ) ) + + ;; CHECK: (func $array.flowing.type (type $4) (result anyref) + ;; CHECK-NEXT: (local $temp (ref $array)) + ;; CHECK-NEXT: (local $1 (ref null $array)) + ;; CHECK-NEXT: (local $2 (ref null $array)) + ;; CHECK-NEXT: (local $3 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $1)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.flowing.type (result anyref) + (local $temp (ref $array)) + ;; This is similar to $array.flowing.type from above, but now the array's + ;; values are references. + (local.set $temp + (array.new $array + (ref.null $array) + (i32.const 1) + ) + ) + (drop + (block $nullable (result (ref null $array)) + (local.get $temp) + ) + ) + (drop + (block $non-nullable (result (ref $array)) + (local.get $temp) + ) + ) + ;; Test supertypes too. + (drop + (block $non-nullable (result (ref array)) + (local.get $temp) + ) + ) + (drop + (block $non-nullable (result (ref null array)) + (local.get $temp) + ) + ) + ;; This block should *not* change type: it is affected by the value in the + ;; array, not the reference to the array itself. + (block (result anyref) + (array.get $array + (local.get $temp) + (i32.const 0) + ) + ) + ) ) From 5c171888a1a942fe7f3ced9aabc455c76f2ee31a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:06:15 -0700 Subject: [PATCH 48/53] clarify --- src/passes/Heap2Local.cpp | 6 ++++++ test/lit/passes/heap2local.wast | 7 +++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index fe2870a78c2..9940589696d 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -867,6 +867,12 @@ struct Array2Struct : PostWalker { // the array type, it should be the struct type. Note that we do this before // the walk that is after us, because the walk may read these types and // depend on them to be valid. + // + // Note that |reached| contains array.get operations, which are reached in + // the analysis, and so we will update their types if they happen to have + // the array type (which can be the case of an array of arrays). But that is + // fine to do as the array.get is rewritten to a struct.get which is then + // lowered away to locals anyhow. auto nullArray = Type(arrayType, Nullable); auto nonNullArray = Type(arrayType, NonNullable); auto nullStruct = Type(structType, Nullable); diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 7ff5ad8d092..37c78a675d5 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -3249,8 +3249,11 @@ (local.get $temp) ) ) - ;; This block should *not* change type: it is affected by the value in the - ;; array, not the reference to the array itself. + ;; This block's type should end up valid. In particular this test checks + ;; that we do not get confused by the array's type, which we rewrite to the + ;; struct type in Array2Struct - this array.get's type is the array type, + ;; but only because the value we read is the array type, and not because the + ;; allocation reaches here. (block (result anyref) (array.get $array (local.get $temp) From becbf6118b4714bf33739e1dd6cf8b8ead1e5499 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:19:21 -0700 Subject: [PATCH 49/53] feedback --- src/passes/Heap2Local.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 9940589696d..c50a05a7f02 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -815,15 +815,9 @@ struct Array2Struct : PostWalker { // Build the struct type we need: as many fields as the size of the array, // all of the same type as the array's element. + numFields = getArrayNewSize(allocation); auto arrayType = allocation->type.getHeapType(); auto element = arrayType.getArray().element; - if (auto* arrayNew = allocation->dynCast()) { - numFields = getIndex(arrayNew->size); - } else if (auto* arrayNewFixed = allocation->dynCast()) { - numFields = arrayNewFixed->values.size(); - } else { - WASM_UNREACHABLE("bad allocation"); - } FieldList fields; for (Index i = 0; i < numFields; i++) { fields.push_back(element); @@ -989,6 +983,18 @@ struct Array2Struct : PostWalker { void noteCurrentIsReached() { noteIsReached(getCurrent()); } void noteIsReached(Expression* curr) { analyzer.reached.insert(curr); } + + // Given an ArrayNew or ArrayNewFixed, return the size of the array that is + // being allocated. + Index getArrayNewSize(Expression* allocation) { + if (auto* arrayNew = allocation->dynCast()) { + return getIndex(arrayNew->size); + } else if (auto* arrayNewFixed = allocation->dynCast()) { + return arrayNewFixed->values.size(); + } else { + WASM_UNREACHABLE("bad allocation"); + } + } }; // Core Heap2Local optimization that operates on a function: Builds up the data From 4e279b0e2bfa5dab102382db1b3c2fbcb7df8c61 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:22:48 -0700 Subject: [PATCH 50/53] feedback: remove unused code --- src/passes/Heap2Local.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index c50a05a7f02..e43eb9f517a 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -843,11 +843,6 @@ struct Array2Struct : PostWalker { // The ArrayNew* will be replaced with a block containing the local.set // and the structNew. arrayNewReplacement = builder.makeSequence(set, structNew); - // The data flows through the new block we just added: inform the - // analysis of that by telling it to treat it as code that it reached - // (only code we reached during the tracing of the allocation through - // the function will be optimized in Struct2Local). - noteIsReached(arrayNewReplacement); } } else if (auto* arrayNewFixed = allocation->dynCast()) { // Simply use the same values as the array. From 568856c6241ad133d9a2384e8e1e35f7c7ebdcdd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:26:03 -0700 Subject: [PATCH 51/53] feedback: fix --- src/passes/Heap2Local.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index e43eb9f517a..a9465b1d7f2 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -852,6 +852,14 @@ struct Array2Struct : PostWalker { WASM_UNREACHABLE("bad allocation"); } + // Mark the new expressions we created as reached by the analysis. We need + // to inform the analysis of this because Struct2Local will only process + // such code (it depends on the analysis to tell it what the allocation is + // and where it flowed). Note that the two values here may be identical but + // there is no harm to calling it twice in that case. + noteIsReached(structNew); + noteIsReached(arrayNewReplacement); + // Update types along the path reached by the allocation: whenever we see // the array type, it should be the struct type. Note that we do this before // the walk that is after us, because the walk may read these types and From 83b3b0bb375916df0f2cc8bf8f3940f16d980c4d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:33:02 -0700 Subject: [PATCH 52/53] feedback: test --- test/lit/passes/heap2local.wast | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 37c78a675d5..45b0c244287 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2588,6 +2588,55 @@ ) ) + ;; CHECK: (func $array.local.super (type $3) + ;; CHECK-NEXT: (local $temp anyref) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 i32) + ;; CHECK-NEXT: (local $3 i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $array.local.super + ;; This local is a supertype, and the allocation flows through a cast, all + ;; of which we handle. + (local $temp (ref null any)) + (local.set $temp + (array.new $array + (i32.const 42) + (i32.const 1) + ) + ) + (array.set $array + (ref.cast (ref $array) + (local.get $temp) + ) + (i32.const 0) + (i32.const 100) + ) + ) + ;; CHECK: (func $array.folded (type $2) (result i32) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 i32) From 1e02354e40bdb1877d76bdcf12fe8e2f7ef4dd5f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Apr 2024 09:33:55 -0700 Subject: [PATCH 53/53] feedback: test --- test/lit/passes/heap2local.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index 45b0c244287..6bdfa0be85f 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -3110,8 +3110,8 @@ ;; Arrays with reference values. (module - ;; CHECK: (type $array (sub (array (ref null $array)))) - (type $array (sub (array (ref null $array)))) + ;; CHECK: (type $array (array (ref null $array))) + (type $array (array (ref null $array))) ;; CHECK: (type $1 (struct (field (ref null $array))))