Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 176 additions & 46 deletions src/ir/module-splitting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@
// 8. Export globals, tags, tables, and memories from the primary module and
// import them in the secondary modules.
//
// 9. Run RemoveUnusedModuleElements pass on the secondary modules in order to
// remove unused imports.
//
// Functions can be used or referenced three ways in a WebAssembly module: they
// can be exported, called, or referenced with ref.func. The above procedure
// introduces a layer of indirection to each of those mechanisms that removes
Expand Down Expand Up @@ -77,10 +74,9 @@
#include "ir/module-splitting.h"
#include "asmjs/shared-constants.h"
#include "ir/export-utils.h"
#include "ir/find_all.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/utils.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-builder.h"
#include "wasm.h"
Expand Down Expand Up @@ -963,13 +959,11 @@ void ModuleSplitter::shareImportableItems() {
}
};

for (auto& secondaryPtr : secondaries) {
Module& secondary = *secondaryPtr;

// Collect names used in the secondary module
// Given a module, collect names used in the module
auto getUsedNames = [&](Module& module) {
UsedNames used;
ModuleUtils::ParallelFunctionAnalysis<UsedNames> nameCollector(
secondary, [&](Function* func, UsedNames& used) {
module, [&](Function* func, UsedNames& used) {
if (!func->imported()) {
NameCollector(used).walk(func->body);
}
Expand All @@ -983,65 +977,201 @@ void ModuleSplitter::shareImportableItems() {
}

NameCollector collector(used);
collector.walkModuleCode(&secondary);
for (auto& segment : secondary.dataSegments) {
collector.walkModuleCode(&module);
for (auto& segment : module.dataSegments) {
if (segment->memory.is()) {
used.memories.insert(segment->memory);
}
}
for (auto& segment : secondary.elementSegments) {
for (auto& segment : module.elementSegments) {
if (segment->table.is()) {
used.tables.insert(segment->table);
}
}

// Export module items that are used in the secondary module
for (auto& memory : primary.memories) {
if (!used.memories.count(memory->name)) {
continue;
// If primary module has exports, they are "used" in it. Secondary modules
// don't have exports, so this only applies to the primary module.
for (auto& ex : module.exports) {
switch (ex->kind) {
case ExternalKind::Global:
used.globals.insert(*ex->getInternalName());
break;
case ExternalKind::Memory:
used.memories.insert(*ex->getInternalName());
break;
case ExternalKind::Table:
used.tables.insert(*ex->getInternalName());
break;
case ExternalKind::Tag:
used.tags.insert(*ex->getInternalName());
break;
default:
break;
}
}

return used;
};

UsedNames primaryUsed = getUsedNames(primary);
std::vector<UsedNames> secondaryUsed;
for (auto& secondaryPtr : secondaries) {
secondaryUsed.push_back(getUsedNames(*secondaryPtr));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if using emplace_back would have a noticeable performance difference here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To do something like

   secondaryUsed.emplace_back(*secondaryPtr);

we need to change getUsedNames to a UsedName constructor. I tried that and it didn't improve the performance in a meaningful way. (The 3-time average runtime differed by 0.3s, which can be just normal fluctuations)

}

// Compute globals referenced in other globals' initializers. Since globals
// can reference other globals, we must ensure that if a global is used in a
// module, all its dependencies are also marked as used.
auto computeDependentItems = [&](UsedNames& used) {
std::vector<Name> worklist(used.globals.begin(), used.globals.end());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle globals could also appear in segment offsets, element segment elements, and soon table default values. Is there a way we can generalize this to handle arbitrary module-level code?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's something we can adjust when gathering used.globals. Currently we use Module::walkModuleCode in getUsedNames, but in the next PR (#8442) I wanted to avoid walking global initializers so added a custom function called walkSegments that only walked data and element segments.

We currently don't move segments and pin all of them in the primary module:

// Find functions that refer to data or element segments. These functions must
// remain in the primary module because segments cannot be exported to be
// accessed from the secondary module.
//
// TODO: Investigate other options, such as moving the segments to the
// secondary module or replacing the segment-using instructions in the
// secondary module with calls to imports.
ModuleUtils::ParallelFunctionAnalysis<std::vector<Name>>
segmentReferrerCollector(
primary, [&](Function* func, std::vector<Name>& segmentReferrers) {
if (func->imported()) {
return;
}
struct SegmentReferrerCollector
: PostWalker<SegmentReferrerCollector,
UnifiedExpressionVisitor<SegmentReferrerCollector>> {
bool hasSegmentReference = false;
void visitExpression(Expression* curr) {
#define DELEGATE_ID curr->_id
#define DELEGATE_START(id) [[maybe_unused]] auto* cast = curr->cast<id>();
#define DELEGATE_GET_FIELD(id, field) cast->field
#define DELEGATE_FIELD_TYPE(id, field)
#define DELEGATE_FIELD_HEAPTYPE(id, field)
#define DELEGATE_FIELD_CHILD(id, field)
#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field)
#define DELEGATE_FIELD_INT(id, field)
#define DELEGATE_FIELD_LITERAL(id, field)
#define DELEGATE_FIELD_NAME(id, field)
#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field)
#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field)
#define DELEGATE_FIELD_ADDRESS(id, field)
#define DELEGATE_FIELD_NAME_KIND(id, field, kind) \
if (kind == ModuleItemKind::DataSegment || \
kind == ModuleItemKind::ElementSegment) { \
hasSegmentReference = true; \
}
#include "wasm-delegations-fields.def"
}
};
SegmentReferrerCollector collector;
collector.walkFunction(func);
if (collector.hasSegmentReference) {
segmentReferrers.push_back(func->name);
}
});
std::unordered_set<Name> segmentReferrers;
for (auto& [_, referrers] : segmentReferrerCollector.map) {
segmentReferrers.insert(referrers.begin(), referrers.end());
}

So all tables and memories that are referred by these segments are also in the primary module. I think we can probably relax this restriction and move more segments (and tables and memories and functions that have segment referring instructions) to secondary modules. But this will be a follow-up.

and soon table default values

Didn't know that. Then we should add that to walkModuleCode too.

for (auto name : worklist) {
// At this point all globals are still in the primary module, so this
// exists
auto* global = primary.getGlobal(name);
if (!global->imported() && global->init) {
for (auto* get : FindAll<GlobalGet>(global->init).list) {
used.globals.insert(get->name);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also need to add the newly found used global back to the worklist?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not here, but I did it in #8442. In this PR, we just import every global necessary from the secondary module due to this dependencies, so this is fine. For example, if you see transitive-globals.wast test added here, a is moved to a secondary module, and we just import its dependency b, so we don't need to do anything to c. If we move b too then we should either import/move c, in which case we need to add c to the worklist. That's done in the next PR (#8442)

}
}
}
};

for (auto& used : secondaryUsed) {
computeDependentItems(used);
}

// Given a name and module item kind, returns the list of secondary modules
// using that name
auto getUsingSecondaries = [&](const Name& name, auto UsedNames::* field) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoah, I've never seen this member pointer syntax before. Neat!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Me neither. Gemini did it 🙃

std::vector<Module*> usingModules;
for (size_t i = 0; i < secondaries.size(); ++i) {
if ((secondaryUsed[i].*field).count(name)) {
usingModules.push_back(secondaries[i].get());
}
auto secondaryMemory = ModuleUtils::copyMemory(memory.get(), secondary);
makeImportExport(
*memory, *secondaryMemory, "memory", ExternalKind::Memory);
}
return usingModules;
};

for (auto& table : primary.tables) {
// 1. In case we copied this table to this secondary module in
// setupTablePatching(), secondary.getTableOrNull(table->name) is not
// null, and we need to export it.
// 2. As in the case with other module elements, if the table is used in
// the secondary module's instructions, we need to export it.
auto secondaryTable = secondary.getTableOrNull(table->name);
if (!secondaryTable && !used.tables.count(table->name)) {
continue;
// Share module items with secondary modules.
// 1. Only share an item with the modules that use it
// 2. If an item is used by only a single secondary module, move the item to
// that secondary module. If an item is used by multiple modules (including
// the primary and secondary modules), export the item from the primary and
// import it from the using secondary modules.

std::vector<Name> memoriesToRemove;
for (auto& memory : primary.memories) {
auto usingSecondaries =
getUsingSecondaries(memory->name, &UsedNames::memories);
bool usedInPrimary = primaryUsed.memories.count(memory->name);

if (!usedInPrimary && usingSecondaries.size() == 1) {
auto* secondary = usingSecondaries[0];
ModuleUtils::copyMemory(memory.get(), *secondary);
memoriesToRemove.push_back(memory->name);
} else {
for (auto* secondary : usingSecondaries) {
auto* secondaryMemory =
ModuleUtils::copyMemory(memory.get(), *secondary);
makeImportExport(
*memory, *secondaryMemory, "memory", ExternalKind::Memory);
}
if (!secondaryTable) {
secondaryTable = ModuleUtils::copyTable(table.get(), secondary);
}
}
for (auto& name : memoriesToRemove) {
primary.removeMemory(name);
}

std::vector<Name> tablesToRemove;
for (auto& table : primary.tables) {
auto usingSecondaries =
getUsingSecondaries(table->name, &UsedNames::tables);
bool usedInPrimary = primaryUsed.tables.count(table->name);

if (!usedInPrimary && usingSecondaries.size() == 1) {
auto* secondary = usingSecondaries[0];
// In case we copied this table to this secondary module in
// setupTablePatching(), !usedInPrimary can't be satisfied, because the
// primary module should have an element segment that refers to this
// table.
assert(!secondary->getTableOrNull(table->name));
ModuleUtils::copyTable(table.get(), *secondary);
tablesToRemove.push_back(table->name);
} else {
for (auto* secondary : usingSecondaries) {
// 1. In case we copied this table to this secondary module in
// setupTablePatching(), secondary.getTableOrNull(table->name) is not
// null, and we need to import it.
// 2. As in the case with other module elements, if the table is used in
// the secondary module's instructions, we need to export it.
auto secondaryTable = secondary->getTableOrNull(table->name);
if (!secondaryTable) {
secondaryTable = ModuleUtils::copyTable(table.get(), *secondary);
}
makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table);
}
makeImportExport(*table, *secondaryTable, "table", ExternalKind::Table);
}
}
for (auto& name : tablesToRemove) {
primary.removeTable(name);
}

for (auto& global : primary.globals) {
if (!used.globals.count(global->name)) {
continue;
std::vector<Name> globalsToRemove;
for (auto& global : primary.globals) {
if (global->mutable_) {
assert(primary.features.hasMutableGlobals() &&
"TODO: add wrapper functions for disallowed mutable globals");
}

auto usingSecondaries =
getUsingSecondaries(global->name, &UsedNames::globals);
bool usedInPrimary = primaryUsed.globals.count(global->name);
if (!usedInPrimary && usingSecondaries.size() == 1) {
auto* secondary = usingSecondaries[0];
ModuleUtils::copyGlobal(global.get(), *secondary);
globalsToRemove.push_back(global->name);
// Import global initializer's ref.func dependences
if (global->init) {
for (auto* ref : FindAll<RefFunc>(global->init).list) {
// Here, ref->func is either a function the primary module, or a
// trampoline created in indirectReferencesToSecondaryFunctions in
// case the original function is in one of the secondaries.
assert(primary.getFunctionOrNull(ref->func));
exportImportFunction(ref->func, {secondary});
}
}
if (global->mutable_) {
assert(primary.features.hasMutableGlobals() &&
"TODO: add wrapper functions for disallowed mutable globals");
} else {
for (auto* secondary : usingSecondaries) {
auto* secondaryGlobal =
ModuleUtils::copyGlobal(global.get(), *secondary);
makeImportExport(
*global, *secondaryGlobal, "global", ExternalKind::Global);
}
auto* secondaryGlobal = ModuleUtils::copyGlobal(global.get(), secondary);
makeImportExport(
*global, *secondaryGlobal, "global", ExternalKind::Global);
}
}
for (auto& name : globalsToRemove) {
primary.removeGlobal(name);
}

std::vector<Name> tagsToRemove;
for (auto& tag : primary.tags) {
auto usingSecondaries = getUsingSecondaries(tag->name, &UsedNames::tags);
bool usedInPrimary = primaryUsed.tags.count(tag->name);

for (auto& tag : primary.tags) {
if (!used.tags.count(tag->name)) {
continue;
if (!usedInPrimary && usingSecondaries.size() == 1) {
auto* secondary = usingSecondaries[0];
ModuleUtils::copyTag(tag.get(), *secondary);
tagsToRemove.push_back(tag->name);
} else {
for (auto* secondary : usingSecondaries) {
auto* secondaryTag = ModuleUtils::copyTag(tag.get(), *secondary);
makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag);
}
auto* secondaryTag = ModuleUtils::copyTag(tag.get(), secondary);
makeImportExport(*tag, *secondaryTag, "tag", ExternalKind::Tag);
}
}
for (auto& name : tagsToRemove) {
primary.removeTag(name);
}
}

} // anonymous namespace
Expand Down
38 changes: 38 additions & 0 deletions test/lit/wasm-split/global-funcref.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
;; RUN: wasm-split %s -all -g -o1 %t.1.wasm -o2 %t.2.wasm --keep-funcs=keep
;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY
;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY

;; When a split global ($a here)'s initializer contains a ref.func of a split
;; function, currently we create its trampoline in the primary module and export
;; it.
;; TODO Use $split in the secondary module directly in the split global

(module
;; PRIMARY: (export "trampoline_split" (func $trampoline_split))

;; PRIMARY: (func $keep
;; PRIMARY-NEXT: )
(func $keep)

;; PRIMARY: (func $trampoline_split
;; PRIMARY-NEXT: (call_indirect (type $0)
;; PRIMARY-NEXT: (i32.const 0)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )


;; SECONDARY: (import "primary" "trampoline_split" (func $trampoline_split (exact)))
;; SECONDARY: (global $a funcref (ref.func $trampoline_split))
(global $a funcref (ref.func $split))

;; SECONDARY: (func $split
;; SECONDARY-NEXT: (drop
;; SECONDARY-NEXT: (global.get $a)
;; SECONDARY-NEXT: )
;; SECONDARY-NEXT: )
(func $split
(drop
(global.get $a)
)
)
)
57 changes: 0 additions & 57 deletions test/lit/wasm-split/selective-exports.wast

This file was deleted.

Loading
Loading