diff --git a/src/ir/branch-utils.h b/src/ir/branch-utils.h index f7b024e78fe..be3f7f7a811 100644 --- a/src/ir/branch-utils.h +++ b/src/ir/branch-utils.h @@ -419,10 +419,14 @@ struct BranchTargets { // Gets the expression that defines this branch target, i.e., where we branch // to if we branch to that name. - Expression* getTarget(Name name) { return inner.targets[name]; } + Expression* getTarget(Name name) const { + auto iter = inner.targets.find(name); + assert(iter != inner.targets.end()); + return iter->second; + } // Gets the expressions branching to a target. - std::unordered_set getBranches(Name name) { + std::unordered_set getBranches(Name name) const { auto iter = inner.branches.find(name); if (iter != inner.branches.end()) { return iter->second; diff --git a/src/ir/parents.h b/src/ir/parents.h index c5614546cb8..5ab0bc2828c 100644 --- a/src/ir/parents.h +++ b/src/ir/parents.h @@ -24,7 +24,13 @@ namespace wasm { struct Parents { Parents(Expression* expr) { inner.walk(expr); } - Expression* getParent(Expression* curr) { return inner.parentMap[curr]; } + Expression* getParent(Expression* curr) const { + auto iter = inner.parentMap.find(curr); + if (iter != inner.parentMap.end()) { + return iter->second; + } + return nullptr; + } private: struct Inner diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index fcf0d88863b..534ff2f3d7d 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -166,337 +166,46 @@ namespace wasm { namespace { -struct Heap2LocalOptimizer { - Function* func; - Module* module; - const PassOptions& passOptions; - - // To find allocations that do not escape, we must track locals so that we - // can see how allocations flow from sets to gets and so forth. - // TODO: only scan reference types - LocalGraph localGraph; +// Core analysis that provides an escapes() method to check if an allocation +// escapes in a way that prevents optimizing it away as described above. It also +// stashes information about the relevant expressions as it goes, which helps +// optimization later (|seen| and |reached|). +struct EscapeAnalyzer { + // All the expressions that have already been seen by the optimizer, see the + // comment above on exclusivity: once we have seen something when analyzing + // one allocation, if we reach it again then we can exit early since seeing it + // a second time proves we lost exclusivity. We must track this across + // multiple instances of EscapeAnalyzer as each handles a particular + // allocation. + std::unordered_set& seen; // To find what escapes, we need to follow where values flow, both up to - // parents, and via branches. - Parents parents; - BranchUtils::BranchTargets branchTargets; - - Heap2LocalOptimizer(Function* func, - Module* module, - const PassOptions& passOptions) - : func(func), module(module), passOptions(passOptions), - localGraph(func, module), parents(func->body), branchTargets(func->body) { - // We need to track what each set influences, to see where its value can - // flow to. - localGraph.computeSetInfluences(); - - // All the allocations in the function. - // TODO: Arrays (of constant size) as well, if all element accesses use - // constant indexes. One option might be to first convert such - // nonescaping arrays into structs. - FindAll allocations(func->body); - - for (auto* allocation : allocations.list) { - // The point of this optimization is to replace heap allocations with - // locals, so we must be able to place the data in locals. - if (!canHandleAsLocals(allocation->type)) { - continue; - } - - convertToLocals(allocation); - } - } - - bool canHandleAsLocals(Type type) { - if (type == Type::unreachable) { - return false; - } - auto& fields = type.getHeapType().getStruct().fields; - for (auto field : fields) { - if (!TypeUpdating::canHandleAsLocal(field.type)) { - return false; - } - if (field.isPacked()) { - // TODO: support packed fields by adding coercions/truncations. - return false; - } - } - return true; - } - - // Handles the rewriting that we do to perform the optimization. We store the - // data that rewriting will need here, while we analyze, and then if we can do - // the optimization, we tell it to run. - // - // TODO: Doing a single rewrite walk at the end would be more efficient, but - // it would need to be more complex. - struct Rewriter : PostWalker { - StructNew* allocation; - Function* func; - Module* module; - Builder builder; - const FieldList& fields; - - Rewriter(StructNew* allocation, Function* func, Module* module) - : allocation(allocation), func(func), module(module), builder(*module), - fields(allocation->type.getHeapType().getStruct().fields) {} - - // We must track all the local.sets that write the allocation, to verify - // exclusivity. - std::unordered_set sets; - - // All the expressions we reached during the flow analysis. That is exactly - // all the places where our allocation is used. We track these so that we - // can fix them up at the end, if the optimization ends up possible. - std::unordered_set reached; - - // Maps indexes in the struct to the local index that will replace them. - std::vector localIndexes; - - // In rare cases we may need to refinalize, see below. - bool refinalize = false; - - void applyOptimization() { - // Allocate locals to store the allocation's fields in. - for (auto field : fields) { - localIndexes.push_back(builder.addVar(func, field.type)); - } - - // Replace the things we need to using the visit* methods. - walk(func->body); - - if (refinalize) { - ReFinalize().walkFunctionInModule(func, module); - } - } - - // Rewrite the code in visit* methods. The general approach taken is to - // replace the allocation with a null reference (which may require changing - // types in some places, like making a block return value nullable), and to - // remove all uses of it as much as possible, using the information we have - // (for example, when our allocation reaches a RefAsNonNull we can simply - // remove that operation as we know it would not throw). Some things are - // left to other passes, like getting rid of dropped code without side - // effects. - - // Adjust the type that flows through an expression, updating that type as - // necessary. - void adjustTypeFlowingThrough(Expression* curr) { - if (!reached.count(curr)) { - return; - } - - // Our allocation passes through this expr. We must turn its type into a - // nullable one, because we will remove things like RefAsNonNull of it, - // which means we may no longer have a non-nullable value as our input, - // and we could fail to validate. It is safe to make this change in terms - // of our parent, since we know very specifically that only safe things - // will end up using our value, like a StructGet or a Drop, which do not - // care about non-nullability. - assert(curr->type.isRef()); - curr->type = Type(curr->type.getHeapType(), Nullable); - } - - void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } - - void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } - - void visitLocalSet(LocalSet* curr) { - if (!reached.count(curr)) { - return; - } - - // We don't need any sets of the reference to any of the locals it - // originally was written to. - if (curr->isTee()) { - replaceCurrent(curr->value); - } else { - replaceCurrent(builder.makeDrop(curr->value)); - } - } - - void visitLocalGet(LocalGet* curr) { - if (!reached.count(curr)) { - return; - } - - // Uses of this get will drop it, so the value does not matter. Replace it - // with something else, which avoids issues with non-nullability (when - // non-nullable locals are enabled), which could happen like this: - // - // (local $x (ref $foo)) - // (local.set $x ..) - // (.. (local.get $x)) - // - // If we remove the set but not the get then the get would appear to read - // the default value of a non-nullable local, which is not allowed. - // - // For simplicity, replace the get with a null. We anyhow have null types - // in the places where our allocation was earlier, see notes on - // visitBlock, and so using a null here adds no extra complexity. - replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); - } - - void visitBreak(Break* curr) { - if (!reached.count(curr)) { - return; - } - - // Breaks that our allocation flows through may change type, as we now - // have a nullable type there. - curr->finalize(); - } - - void visitStructNew(StructNew* curr) { - if (curr != allocation) { - return; - } + // parents, and via branches, and through locals. + // TODO: for efficiency, only scan reference types in LocalGraph + const LocalGraph& localGraph; + const Parents& parents; + const BranchUtils::BranchTargets& branchTargets; - // First, assign the initial values to the new locals. - std::vector contents; - - if (!allocation->isWithDefault()) { - // We must assign the initial values to temp indexes, then copy them - // over all at once. If instead we did set them as we go, then we might - // hit a problem like this: - // - // (local.set X (new_X)) - // (local.set Y (block (result ..) - // (.. (local.get X) ..) ;; returns new_X, wrongly - // (new_Y) - // ) - // - // Note how we assign to the local X and use it during the assignment to - // the local Y - but we should still see the old value of X, not new_X. - // Temp locals X', Y' can ensure that: - // - // (local.set X' (new_X)) - // (local.set Y' (block (result ..) - // (.. (local.get X) ..) ;; returns the proper, old X - // (new_Y) - // ) - // .. - // (local.set X (local.get X')) - // (local.set Y (local.get Y')) - std::vector tempIndexes; - - for (auto field : fields) { - tempIndexes.push_back(builder.addVar(func, field.type)); - } - - // Store the initial values into the temp locals. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back( - builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); - } - - // Copy them to the normal ones. - for (Index i = 0; i < tempIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeLocalGet(tempIndexes[i], fields[i].type))); - } - - // TODO Check if the nondefault case does not increase code size in some - // cases. A heap allocation that implicitly sets the default values - // is smaller than multiple explicit settings of locals to - // defaults. - } else { - // Set the default values. - // Note that we must assign the defaults because we might be in a loop, - // that is, there might be a previous value. - for (Index i = 0; i < localIndexes.size(); i++) { - contents.push_back(builder.makeLocalSet( - localIndexes[i], - builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); - } - } - - // Replace the allocation with a null reference. This changes the type - // from non-nullable to nullable, but as we optimize away the code that - // the allocation reaches, we will handle that. - contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); - replaceCurrent(builder.makeBlock(contents)); - } - - void visitRefAs(RefAs* curr) { - if (!reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefAsNonNull, since we proved it - // contains our allocation, and so cannot trap. - assert(curr->op == RefAsNonNull); - replaceCurrent(curr->value); - } - - void visitRefCast(RefCast* curr) { - if (!reached.count(curr)) { - return; - } - - // It is safe to optimize out this RefCast, since we proved it - // contains our allocation and we have checked that the type of - // the allocation is a subtype of the type of the cast, and so - // cannot trap. - replaceCurrent(curr->ref); - - // We need to refinalize after this, as while we know the cast is not - // logically needed - the value flowing through will not be used - we do - // need validation to succeed even before other optimizations remove the - // code. For example: - // - // (block (result $B) - // (ref.cast $B - // (block (result $A) - // - // Without the cast this does not validate, so we need to refinalize - // (which will fix this, as we replace the unused value with a null, so - // that type will propagate out). - refinalize = true; - } - - void visitStructSet(StructSet* curr) { - if (!reached.count(curr)) { - return; - } - - // Drop the ref (leaving it to other opts to remove, when possible), and - // write the data to the local instead of the heap allocation. - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalSet(localIndexes[curr->index], curr->value))); - } - - void visitStructGet(StructGet* curr) { - if (!reached.count(curr)) { - return; - } - - auto type = fields[curr->index].type; - if (type != curr->type) { - // Normally we are just replacing a struct.get with a local.get of a - // local that was created to have the same type as the struct's field, - // but in some cases we may refine, if the struct.get's reference type - // is less refined than the reference that actually arrives, like here: - // - // (struct.get $parent 0 - // (block (ref $parent) - // (struct.new $child))) - // - // We allocated locals for the field of the child, and are replacing a - // get of the parent field with a local of the same type as the child's, - // which may be more refined. - refinalize = true; - } - replaceCurrent(builder.makeSequence( - builder.makeDrop(curr->ref), - builder.makeLocalGet(localIndexes[curr->index], type))); - } - }; - - // All the expressions we have already looked at. - std::unordered_set seen; + const PassOptions& passOptions; + Module& wasm; + + EscapeAnalyzer(std::unordered_set& seen, + const LocalGraph& localGraph, + const Parents& parents, + const BranchUtils::BranchTargets& branchTargets, + const PassOptions& passOptions, + Module& wasm) + : seen(seen), localGraph(localGraph), parents(parents), + branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} + + // We must track all the local.sets that write the allocation, to verify + // exclusivity. + std::unordered_set sets; + + // All the expressions we reached during the flow analysis. That is exactly + // all the places where our allocation is used. We track these so that we + // can fix them up at the end, if the optimization ends up possible. + std::unordered_set reached; enum class ParentChildInteraction { // The parent lets the child escape. E.g. the parent is a call. @@ -518,11 +227,8 @@ struct Heap2LocalOptimizer { Mixes, }; - // Analyze an allocation to see if we can convert it from a heap allocation to - // locals. - void convertToLocals(StructNew* allocation) { - Rewriter rewriter(allocation, func, module); - + // Analyze an allocation to see if it escapes or not. + bool escapes(StructNew* allocation) { // A queue of flows from children to parents. When something is in the queue // here then it assumed that it is ok for the allocation to be at the child // (that is, we have already checked the child before placing it in the @@ -546,7 +252,7 @@ struct Heap2LocalOptimizer { interaction == ParentChildInteraction::Mixes) { // If the parent may let us escape, or the parent mixes other values // up with us, give up. - return; + return true; } // The parent either fully consumes us, or flows us onwards; either way, @@ -575,7 +281,7 @@ struct Heap2LocalOptimizer { // added the parent to |seen| for both children, the reference would get // blocked from being optimized. if (!seen.emplace(parent).second) { - return; + return true; } // We can proceed, as the parent interacts with us properly, and we are @@ -592,7 +298,7 @@ struct Heap2LocalOptimizer { // exclusive use of our allocation by all the gets that read the value. // Note the set, and we will check the gets at the end once we know all // of our sets. - rewriter.sets.insert(set); + sets.insert(set); // We must also look at how the value flows from those gets. if (auto* getsReached = getGetsReached(set)) { @@ -611,17 +317,17 @@ struct Heap2LocalOptimizer { // If we got to here, then we can continue to hope that we can optimize // this allocation. Mark the parent and child as reached by it, and // continue. - rewriter.reached.insert(parent); - rewriter.reached.insert(child); + reached.insert(parent); + reached.insert(child); } // We finished the loop over the flows. Do the final checks. - if (!getsAreExclusiveToSets(rewriter.sets)) { - return; + if (!getsAreExclusiveToSets()) { + return true; } - // We can do it, hurray! - rewriter.applyOptimization(); + // Nothing escapes, hurray! + return false; } ParentChildInteraction getParentChildInteraction(StructNew* allocation, @@ -729,7 +435,7 @@ struct Heap2LocalOptimizer { // Finally, check for mixing. If the child is the immediate fallthrough // of the parent then no other values can be mixed in. - if (Properties::getImmediateFallthrough(parent, passOptions, *module) == + if (Properties::getImmediateFallthrough(parent, passOptions, wasm) == child) { return ParentChildInteraction::Flows; } @@ -755,7 +461,7 @@ struct Heap2LocalOptimizer { return ParentChildInteraction::Mixes; } - LocalGraph::SetInfluences* getGetsReached(LocalSet* set) { + const LocalGraph::SetInfluences* getGetsReached(LocalSet* set) { auto iter = localGraph.setInfluences.find(set); if (iter != localGraph.setInfluences.end()) { return &iter->second; @@ -763,8 +469,8 @@ struct Heap2LocalOptimizer { return nullptr; } - BranchUtils::NameSet branchesSentByParent(Expression* child, - Expression* parent) { + const BranchUtils::NameSet branchesSentByParent(Expression* child, + Expression* parent) { BranchUtils::NameSet names; BranchUtils::operateOnScopeNameUsesAndSentValues( parent, [&](Name name, Expression* value) { @@ -780,7 +486,7 @@ struct Heap2LocalOptimizer { // else), we need to check whether all the gets that read that value cannot // read anything else (which would be the case if another set writes to that // local, in the right live range). - bool getsAreExclusiveToSets(const std::unordered_set& sets) { + bool getsAreExclusiveToSets() { // Find all the relevant gets (which may overlap between the sets). std::unordered_set gets; for (auto* set : sets) { @@ -793,7 +499,9 @@ struct Heap2LocalOptimizer { // Check that the gets can only read from the specific known sets. for (auto* get : gets) { - for (auto* set : localGraph.getSetses[get]) { + auto iter = localGraph.getSetses.find(get); + assert(iter != localGraph.getSetses.end()); + for (auto* set : iter->second) { if (sets.count(set) == 0) { return false; } @@ -804,11 +512,340 @@ struct Heap2LocalOptimizer { } }; -struct Heap2Local : public WalkerPass> { +// An optimizer that handles the rewriting to turn a struct allocation into +// locals. We run this after proving that allocation does not escape. +// +// TODO: Doing a single rewrite walk at the end (for all structs) would be more +// efficient, but it would need to be more complex. +struct Struct2Local : PostWalker { + StructNew* allocation; + const EscapeAnalyzer& analyzer; + Function* func; + Module& wasm; + Builder builder; + const FieldList& fields; + + Struct2Local(StructNew* allocation, + const EscapeAnalyzer& analyzer, + Function* func, + Module& wasm) + : allocation(allocation), analyzer(analyzer), func(func), wasm(wasm), + builder(wasm), fields(allocation->type.getHeapType().getStruct().fields) { + + // Allocate locals to store the allocation's fields in. + for (auto field : fields) { + localIndexes.push_back(builder.addVar(func, field.type)); + } + + // Replace the things we need to using the visit* methods. + walk(func->body); + + if (refinalize) { + ReFinalize().walkFunctionInModule(func, &wasm); + } + } + + // Maps indexes in the struct to the local index that will replace them. + std::vector localIndexes; + + // In rare cases we may need to refinalize, see below. + bool refinalize = false; + + // Rewrite the code in visit* methods. The general approach taken is to + // replace the allocation with a null reference (which may require changing + // types in some places, like making a block return value nullable), and to + // remove all uses of it as much as possible, using the information we have + // (for example, when our allocation reaches a RefAsNonNull we can simply + // remove that operation as we know it would not throw). Some things are + // left to other passes, like getting rid of dropped code without side + // effects. + + // Adjust the type that flows through an expression, updating that type as + // necessary. + void adjustTypeFlowingThrough(Expression* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Our allocation passes through this expr. We must turn its type into a + // nullable one, because we will remove things like RefAsNonNull of it, + // which means we may no longer have a non-nullable value as our input, + // and we could fail to validate. It is safe to make this change in terms + // of our parent, since we know very specifically that only safe things + // will end up using our value, like a StructGet or a Drop, which do not + // care about non-nullability. + assert(curr->type.isRef()); + curr->type = Type(curr->type.getHeapType(), Nullable); + } + + void visitBlock(Block* curr) { adjustTypeFlowingThrough(curr); } + + void visitLoop(Loop* curr) { adjustTypeFlowingThrough(curr); } + + void visitLocalSet(LocalSet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // We don't need any sets of the reference to any of the locals it + // originally was written to. + if (curr->isTee()) { + replaceCurrent(curr->value); + } else { + replaceCurrent(builder.makeDrop(curr->value)); + } + } + + void visitLocalGet(LocalGet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Uses of this get will drop it, so the value does not matter. Replace it + // with something else, which avoids issues with non-nullability (when + // non-nullable locals are enabled), which could happen like this: + // + // (local $x (ref $foo)) + // (local.set $x ..) + // (.. (local.get $x)) + // + // If we remove the set but not the get then the get would appear to read + // the default value of a non-nullable local, which is not allowed. + // + // For simplicity, replace the get with a null. We anyhow have null types + // in the places where our allocation was earlier, see notes on + // visitBlock, and so using a null here adds no extra complexity. + replaceCurrent(builder.makeRefNull(curr->type.getHeapType())); + } + + void visitBreak(Break* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Breaks that our allocation flows through may change type, as we now + // have a nullable type there. + curr->finalize(); + } + + void visitStructNew(StructNew* curr) { + if (curr != allocation) { + return; + } + + // First, assign the initial values to the new locals. + std::vector contents; + + if (!allocation->isWithDefault()) { + // We must assign the initial values to temp indexes, then copy them + // over all at once. If instead we did set them as we go, then we might + // hit a problem like this: + // + // (local.set X (new_X)) + // (local.set Y (block (result ..) + // (.. (local.get X) ..) ;; returns new_X, wrongly + // (new_Y) + // ) + // + // Note how we assign to the local X and use it during the assignment to + // the local Y - but we should still see the old value of X, not new_X. + // Temp locals X', Y' can ensure that: + // + // (local.set X' (new_X)) + // (local.set Y' (block (result ..) + // (.. (local.get X) ..) ;; returns the proper, old X + // (new_Y) + // ) + // .. + // (local.set X (local.get X')) + // (local.set Y (local.get Y')) + std::vector tempIndexes; + + for (auto field : fields) { + tempIndexes.push_back(builder.addVar(func, field.type)); + } + + // Store the initial values into the temp locals. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back( + builder.makeLocalSet(tempIndexes[i], allocation->operands[i])); + } + + // Copy them to the normal ones. + for (Index i = 0; i < tempIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeLocalGet(tempIndexes[i], fields[i].type))); + } + + // TODO Check if the nondefault case does not increase code size in some + // cases. A heap allocation that implicitly sets the default values + // is smaller than multiple explicit settings of locals to + // defaults. + } else { + // Set the default values. + // Note that we must assign the defaults because we might be in a loop, + // that is, there might be a previous value. + for (Index i = 0; i < localIndexes.size(); i++) { + contents.push_back(builder.makeLocalSet( + localIndexes[i], + builder.makeConstantExpression(Literal::makeZero(fields[i].type)))); + } + } + + // Replace the allocation with a null reference. This changes the type + // from non-nullable to nullable, but as we optimize away the code that + // the allocation reaches, we will handle that. + contents.push_back(builder.makeRefNull(allocation->type.getHeapType())); + replaceCurrent(builder.makeBlock(contents)); + } + + void visitRefAs(RefAs* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefAsNonNull, since we proved it + // contains our allocation, and so cannot trap. + assert(curr->op == RefAsNonNull); + replaceCurrent(curr->value); + } + + void visitRefCast(RefCast* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // It is safe to optimize out this RefCast, since we proved it + // contains our allocation and we have checked that the type of + // the allocation is a subtype of the type of the cast, and so + // cannot trap. + replaceCurrent(curr->ref); + + // We need to refinalize after this, as while we know the cast is not + // logically needed - the value flowing through will not be used - we do + // need validation to succeed even before other optimizations remove the + // code. For example: + // + // (block (result $B) + // (ref.cast $B + // (block (result $A) + // + // Without the cast this does not validate, so we need to refinalize + // (which will fix this, as we replace the unused value with a null, so + // that type will propagate out). + refinalize = true; + } + + void visitStructSet(StructSet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + // Drop the ref (leaving it to other opts to remove, when possible), and + // write the data to the local instead of the heap allocation. + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalSet(localIndexes[curr->index], curr->value))); + } + + void visitStructGet(StructGet* curr) { + if (!analyzer.reached.count(curr)) { + return; + } + + auto type = fields[curr->index].type; + if (type != curr->type) { + // Normally we are just replacing a struct.get with a local.get of a + // local that was created to have the same type as the struct's field, + // but in some cases we may refine, if the struct.get's reference type + // is less refined than the reference that actually arrives, like here: + // + // (struct.get $parent 0 + // (block (ref $parent) + // (struct.new $child))) + // + // We allocated locals for the field of the child, and are replacing a + // get of the parent field with a local of the same type as the child's, + // which may be more refined. + refinalize = true; + } + replaceCurrent(builder.makeSequence( + builder.makeDrop(curr->ref), + builder.makeLocalGet(localIndexes[curr->index], type))); + } +}; + +// Core Heap2Local optimization that operates on a function: Builds up the data +// structures we need (LocalGraph, etc.) that we will use across multiple +// analyses of allocations, and then runs those analyses and optimizes where +// possible. +struct Heap2Local { + Function* func; + Module& wasm; + const PassOptions& passOptions; + + LocalGraph localGraph; + Parents parents; + BranchUtils::BranchTargets branchTargets; + + Heap2Local(Function* func, Module& wasm, const PassOptions& passOptions) + : func(func), wasm(wasm), passOptions(passOptions), localGraph(func, &wasm), + parents(func->body), branchTargets(func->body) { + // We need to track what each set influences, to see where its value can + // flow to. + localGraph.computeSetInfluences(); + + // All the expressions we have already looked at. We use this to avoid + // repeated work, see above. + std::unordered_set seen; + + // All the allocations in the function. + // TODO: Arrays (of constant size) as well, if all element accesses use + // constant indexes. One option might be to first convert such + // nonescaping arrays into structs. + FindAll allocations(func->body); + + for (auto* allocation : allocations.list) { + // The point of this optimization is to replace heap allocations with + // locals, so we must be able to place the data in locals. + if (!canHandleAsLocals(allocation->type)) { + continue; + } + + // Check for escaping, noting relevant information as we go. If this does + // not escape, optimize it. + EscapeAnalyzer analyzer( + seen, localGraph, parents, branchTargets, passOptions, wasm); + if (!analyzer.escapes(allocation)) { + Struct2Local optimizer(allocation, analyzer, func, wasm); + } + } + } + + bool canHandleAsLocals(Type type) { + if (type == Type::unreachable) { + return false; + } + auto& fields = type.getHeapType().getStruct().fields; + for (auto field : fields) { + if (!TypeUpdating::canHandleAsLocal(field.type)) { + return false; + } + if (field.isPacked()) { + // TODO: support packed fields by adding coercions/truncations. + return false; + } + } + return true; + } +}; + +struct Heap2LocalPass : public WalkerPass> { bool isFunctionParallel() override { return true; } std::unique_ptr create() override { - return std::make_unique(); + return std::make_unique(); } void doWalkFunction(Function* func) { @@ -821,12 +858,12 @@ struct Heap2Local : public WalkerPass> { // vacuum, in particular, to optimize such nested allocations. // TODO Consider running multiple iterations here, and running vacuum in // between them. - Heap2LocalOptimizer(func, getModule(), getPassOptions()); + Heap2Local(func, *getModule(), getPassOptions()); } }; } // anonymous namespace -Pass* createHeap2LocalPass() { return new Heap2Local(); } +Pass* createHeap2LocalPass() { return new Heap2LocalPass(); } } // namespace wasm diff --git a/test/lit/passes/heap2local.wast b/test/lit/passes/heap2local.wast index b693c49ee95..be6538a948f 100644 --- a/test/lit/passes/heap2local.wast +++ b/test/lit/passes/heap2local.wast @@ -2045,8 +2045,8 @@ ;; CHECK-NEXT: ) (func $cast-failure (result anyref) (struct.get $B 0 - ;; The allocated $A arrives here, but the cast will fail, - ;; so we do not optimize. + ;; The allocated $A arrives here, but the cast will fail, so we do not + ;; optimize. (ref.cast (ref $B) (struct.new $A (struct.new $A