From 5d74a318a5d7aa14b9e0f6ff442cacdd82f08add Mon Sep 17 00:00:00 2001 From: James M Snell Date: Mon, 20 Jan 2025 11:38:14 -0800 Subject: [PATCH] Continue work on new module registry --- .../api/tests/new-module-registry-test.js | 28 +- .../tests/new-module-registry-test.wd-test | 9 + src/workerd/jsg/commonjs.c++ | 248 ++++++++++++------ src/workerd/jsg/commonjs.h | 14 +- src/workerd/jsg/modules-new-test.c++ | 5 + src/workerd/jsg/modules-new.h | 13 +- src/workerd/server/workerd-api.c++ | 38 +-- 7 files changed, 244 insertions(+), 111 deletions(-) diff --git a/src/workerd/api/tests/new-module-registry-test.js b/src/workerd/api/tests/new-module-registry-test.js index 6013b7885fb..5c85d0b1c9f 100644 --- a/src/workerd/api/tests/new-module-registry-test.js +++ b/src/workerd/api/tests/new-module-registry-test.js @@ -46,6 +46,18 @@ strictEqual(Buffer.from(data.default).toString(), 'abcdef'); import * as json from 'json'; deepStrictEqual(json.default, { foo: 1 }); +import * as cjs from 'cjs'; +deepStrictEqual(cjs.default, 1); + +import * as nodejs from 'nodejs'; +deepStrictEqual(nodejs.default, 2); + +import { a as cjsA } from 'cjs2'; +deepStrictEqual(cjsA, 1); + +import * as cjs3 from 'cjs3'; +deepStrictEqual(cjs.default, 1); + await rejects(import('invalid-json'), { message: /Unexpected non-whitespace character after JSON/, }); @@ -141,10 +153,20 @@ export const evalErrorsInEsmTopLevel = { }, }; +export const cjsThrow = { + async test() { + await rejects(import('cjs-throw'), { + message: 'boom', + }); + }, +}; + // TODO(now): Tests // * [ ] Include tests for all known module types // * [x] ESM -// * [ ] CommonJS +// * [x] CommonJS +// * [x] CommonJS with top level errors +// * [ ] CommonJS with disallowed top-level i/o // * [x] Text // * [x] Data // * [x] JSON @@ -164,12 +186,12 @@ export const evalErrorsInEsmTopLevel = { // * [x] URL resolution works correctly // * [x] Invalid URLs are correctly reported as errors // * [x] Import assertions should be rejected -// * [ ] require(...) Works in CommonJs Modules +// * [x] require(...) Works in CommonJs Modules // * [ ] require(...) correctly handles node: modules with/without the node: prefix // * [ ] Circular dependencies are correctly handled // * [ ] Errors during CommonJs evaluation are correctly reported // * [ ] Entry point ESM with no default export is correctly reported as error -// * [ ] CommonJs modules correctly expose named exports +// * [x] CommonJs modules correctly expose named exports // * [ ] require('module').createRequire API works as expected // * [ ] Fallback service works as expected // * [ ] console.log output correctly uses node-internal:inspect for output diff --git a/src/workerd/api/tests/new-module-registry-test.wd-test b/src/workerd/api/tests/new-module-registry-test.wd-test index 2a726a55eef..447e277ed6a 100644 --- a/src/workerd/api/tests/new-module-registry-test.wd-test +++ b/src/workerd/api/tests/new-module-registry-test.wd-test @@ -30,6 +30,15 @@ const unitTests :Workerd.Config = ( (name = "data", data = "abcdef"), (name = "json", json = "{ \"foo\": 1 }"), (name = "invalid-json", json = "1n"), + (name = "cjs", commonJsModule = "module.exports = 1"), + (name = "cjs-throw", commonJsModule = "throw new Error('boom')"), + (name = "nodejs", nodeJsCompatModule = "module.exports = 2"), + (name = "cjs2", + commonJsModule = "module.exports = { a: 1 }", + namedExports = ["a"], + ), + (name = "cjs3", + commonJsModule = "module.exports = require('.././.././cjs')") ], compatibilityDate = "2024-07-01", compatibilityFlags = [ diff --git a/src/workerd/jsg/commonjs.c++ b/src/workerd/jsg/commonjs.c++ index 8fd5dd06c62..802c21202b9 100644 --- a/src/workerd/jsg/commonjs.c++ +++ b/src/workerd/jsg/commonjs.c++ @@ -1,5 +1,6 @@ #include "commonjs.h" +#include "modules-new.h" #include "modules.h" namespace workerd::jsg { @@ -9,55 +10,95 @@ CommonJsModuleContext::CommonJsModuleContext(jsg::Lock& js, kj::Path path) path(kj::mv(path)), exports(js.v8Isolate, module->getExports(js)) {} -v8::Local CommonJsModuleContext::require(jsg::Lock& js, kj::String specifier) { - auto modulesForResolveCallback = getModulesForResolveCallback(js.v8Isolate); - KJ_REQUIRE(modulesForResolveCallback != nullptr, "didn't expect resolveCallback() now"); +CommonJsModuleContext::CommonJsModuleContext(jsg::Lock& js, const jsg::Url& url) + : module(jsg::alloc(js, kj::str(url.getHref()))), + path(url.clone()), + exports(js.v8Isolate, module->getExports(js)) {} - if (isNodeJsCompatEnabled(js)) { - KJ_IF_SOME(nodeSpec, checkNodeSpecifier(specifier)) { - specifier = kj::mv(nodeSpec); +v8::Local CommonJsModuleContext::require(jsg::Lock& js, kj::String specifier) { + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + auto modulesForResolveCallback = getModulesForResolveCallback(js.v8Isolate); + KJ_REQUIRE(modulesForResolveCallback != nullptr, "didn't expect resolveCallback() now"); + + if (isNodeJsCompatEnabled(js)) { + KJ_IF_SOME(nodeSpec, checkNodeSpecifier(specifier)) { + specifier = kj::mv(nodeSpec); + } + } + + kj::Path targetPath = ([&] { + // If the specifier begins with one of our known prefixes, let's not resolve + // it against the referrer. + if (specifier.startsWith("node:") || specifier.startsWith("cloudflare:") || + specifier.startsWith("workerd:")) { + return kj::Path::parse(specifier); + } + return p.parent().eval(specifier); + })(); + + // require() is only exposed to worker bundle modules so the resolve here is only + // permitted to require worker bundle or built-in modules. Internal modules are + // excluded. + auto& info = + JSG_REQUIRE_NONNULL(modulesForResolveCallback->resolve(js, targetPath, p, + ModuleRegistry::ResolveOption::DEFAULT, + ModuleRegistry::ResolveMethod::REQUIRE, specifier.asPtr()), + Error, "No such module \"", targetPath.toString(), "\"."); + // Adding imported from suffix here not necessary like it is for + // resolveCallback, since we have a js stack that will include the parent + // module's name and location of the failed require(). + + ModuleRegistry::RequireImplOptions options = ModuleRegistry::RequireImplOptions::DEFAULT; + if (getCommonJsExportDefault(js.v8Isolate)) { + options = ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT; + } + + return ModuleRegistry::requireImpl(js, info, options); + } + KJ_CASE_ONEOF(u, jsg::Url) { + return modules::ModuleRegistry::resolve(js, specifier, "default"_kj, + modules::ResolveContext::Type::BUNDLE, modules::ResolveContext::Source::REQUIRE, u); } } - - kj::Path targetPath = ([&] { - // If the specifier begins with one of our known prefixes, let's not resolve - // it against the referrer. - if (specifier.startsWith("node:") || specifier.startsWith("cloudflare:") || - specifier.startsWith("workerd:")) { - return kj::Path::parse(specifier); - } - return path.parent().eval(specifier); - })(); - - // require() is only exposed to worker bundle modules so the resolve here is only - // permitted to require worker bundle or built-in modules. Internal modules are - // excluded. - auto& info = JSG_REQUIRE_NONNULL(modulesForResolveCallback->resolve(js, targetPath, path, - ModuleRegistry::ResolveOption::DEFAULT, - ModuleRegistry::ResolveMethod::REQUIRE, specifier.asPtr()), - Error, "No such module \"", targetPath.toString(), "\"."); - // Adding imported from suffix here not necessary like it is for resolveCallback, since we have a - // js stack that will include the parent module's name and location of the failed require(). - - ModuleRegistry::RequireImplOptions options = ModuleRegistry::RequireImplOptions::DEFAULT; - if (getCommonJsExportDefault(js.v8Isolate)) { - options = ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT; - } - - return ModuleRegistry::requireImpl(js, info, options); + KJ_UNREACHABLE; } void CommonJsModuleContext::visitForMemoryInfo(MemoryTracker& tracker) const { tracker.trackField("exports", exports); - tracker.trackFieldWithSize("path", path.size()); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + tracker.trackFieldWithSize("path", p.size()); + } + KJ_CASE_ONEOF(u, jsg::Url) { + tracker.trackFieldWithSize("path", u.getHref().size()); + } + } } kj::String CommonJsModuleContext::getFilename() const { - return path.toString(true); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + return p.toString(true); + } + KJ_CASE_ONEOF(u, jsg::Url) { + return kj::str(u.getPathname()); + } + } + KJ_UNREACHABLE; } kj::String CommonJsModuleContext::getDirname() const { - return path.parent().toString(true); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + return p.parent().toString(true); + } + // TODO(new-module-registry): Appropriately convert file URL into path + KJ_CASE_ONEOF(u, jsg::Url) { + return kj::str(u.getPathname()); + } + } + KJ_UNREACHABLE; } jsg::Ref CommonJsModuleContext::getModule(jsg::Lock& js) { @@ -67,7 +108,7 @@ jsg::Ref CommonJsModuleContext::getModule(jsg::Lock& js) { v8::Local CommonJsModuleContext::getExports(jsg::Lock& js) const { return exports.getHandle(js); } -void CommonJsModuleContext::setExports(jsg::Value value) { +void CommonJsModuleContext::setExports(jsg::Lock& js, jsg::Value value) { exports = kj::mv(value); } @@ -98,50 +139,66 @@ NodeJsModuleContext::NodeJsModuleContext(jsg::Lock& js, kj::Path path) path(kj::mv(path)), exports(js.v8Ref(module->getExports(js))) {} -v8::Local NodeJsModuleContext::require(jsg::Lock& js, kj::String specifier) { - // If it is a bare specifier known to be a Node.js built-in, then prefix the - // specifier with node: - bool isNodeBuiltin = false; - auto resolveOption = jsg::ModuleRegistry::ResolveOption::DEFAULT; - KJ_IF_SOME(spec, checkNodeSpecifier(specifier)) { - specifier = kj::mv(spec); - isNodeBuiltin = true; - resolveOption = jsg::ModuleRegistry::ResolveOption::BUILTIN_ONLY; - } +NodeJsModuleContext::NodeJsModuleContext(jsg::Lock& js, const jsg::Url& url) + : module(jsg::alloc(js, kj::str(url.getHref()))), + path(url.clone()), + exports(js.v8Isolate, module->getExports(js)) {} - // TODO(cleanup): This implementation from here on is identical to the - // CommonJsModuleContext::require. We should consolidate these as the - // next step. - - auto modulesForResolveCallback = jsg::getModulesForResolveCallback(js.v8Isolate); - KJ_REQUIRE(modulesForResolveCallback != nullptr, "didn't expect resolveCallback() now"); - - kj::Path targetPath = ([&] { - // If the specifier begins with one of our known prefixes, let's not resolve - // it against the referrer. - if (specifier.startsWith("node:") || specifier.startsWith("cloudflare:") || - specifier.startsWith("workerd:")) { - return kj::Path::parse(specifier); - } - return path.parent().eval(specifier); - })(); - - // require() is only exposed to worker bundle modules so the resolve here is only - // permitted to require worker bundle or built-in modules. Internal modules are - // excluded. - auto& info = - JSG_REQUIRE_NONNULL(modulesForResolveCallback->resolve(js, targetPath, path, resolveOption, - ModuleRegistry::ResolveMethod::REQUIRE, specifier.asPtr()), - Error, "No such module \"", targetPath.toString(), "\"."); - // Adding imported from suffix here not necessary like it is for resolveCallback, since we have a - // js stack that will include the parent module's name and location of the failed require(). - - if (!isNodeBuiltin) { - JSG_REQUIRE_NONNULL( - info.maybeSynthetic, TypeError, "Cannot use require() to import an ES Module."); +v8::Local NodeJsModuleContext::require(jsg::Lock& js, kj::String specifier) { + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + // If it is a bare specifier known to be a Node.js built-in, then prefix the + // specifier with node: + bool isNodeBuiltin = false; + auto resolveOption = jsg::ModuleRegistry::ResolveOption::DEFAULT; + KJ_IF_SOME(spec, checkNodeSpecifier(specifier)) { + specifier = kj::mv(spec); + isNodeBuiltin = true; + resolveOption = jsg::ModuleRegistry::ResolveOption::BUILTIN_ONLY; + } + + // TODO(cleanup): This implementation from here on is identical to the + // CommonJsModuleContext::require. We should consolidate these as the + // next step. + + auto modulesForResolveCallback = jsg::getModulesForResolveCallback(js.v8Isolate); + KJ_REQUIRE(modulesForResolveCallback != nullptr, "didn't expect resolveCallback() now"); + + kj::Path targetPath = ([&] { + // If the specifier begins with one of our known prefixes, let's not resolve + // it against the referrer. + if (specifier.startsWith("node:") || specifier.startsWith("cloudflare:") || + specifier.startsWith("workerd:")) { + return kj::Path::parse(specifier); + } + return p.parent().eval(specifier); + })(); + + // require() is only exposed to worker bundle modules so the resolve here is only + // permitted to require worker bundle or built-in modules. Internal modules are + // excluded. + auto& info = + JSG_REQUIRE_NONNULL(modulesForResolveCallback->resolve(js, targetPath, p, resolveOption, + ModuleRegistry::ResolveMethod::REQUIRE, specifier.asPtr()), + Error, "No such module \"", targetPath.toString(), "\"."); + // Adding imported from suffix here not necessary like it is for resolveCallback, + // since we have a js stack that will include the parent module's name and location + // of the failed require(). + + if (!isNodeBuiltin) { + JSG_REQUIRE_NONNULL( + info.maybeSynthetic, TypeError, "Cannot use require() to import an ES Module."); + } + + return ModuleRegistry::requireImpl( + js, info, ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT); + } + KJ_CASE_ONEOF(u, jsg::Url) { + return modules::ModuleRegistry::resolve(js, specifier, "default"_kj, + modules::ResolveContext::Type::BUNDLE, modules::ResolveContext::Source::REQUIRE, u); + } } - - return ModuleRegistry::requireImpl(js, info, ModuleRegistry::RequireImplOptions::EXPORT_DEFAULT); + KJ_UNREACHABLE; } v8::Local NodeJsModuleContext::getBuffer(jsg::Lock& js) { @@ -160,11 +217,28 @@ v8::Local NodeJsModuleContext::getProcess(jsg::Lock& js) { } kj::String NodeJsModuleContext::getFilename() { - return path.toString(true); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + return p.toString(true); + } + KJ_CASE_ONEOF(u, jsg::Url) { + return kj::str(u.getPathname()); + } + } + KJ_UNREACHABLE; } kj::String NodeJsModuleContext::getDirname() { - return path.parent().toString(true); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + return p.parent().toString(true); + } + // TODO(new-module-registry): Appropriately convert file URL into path + KJ_CASE_ONEOF(u, jsg::Url) { + return kj::str(u.getPathname()); + } + } + KJ_UNREACHABLE; } jsg::Ref NodeJsModuleContext::getModule(jsg::Lock& js) { @@ -195,4 +269,16 @@ kj::StringPtr NodeJsModuleObject::getPath() { return path; } +void NodeJsModuleContext::visitForMemoryInfo(MemoryTracker& tracker) const { + tracker.trackField("exports", exports); + KJ_SWITCH_ONEOF(path) { + KJ_CASE_ONEOF(p, kj::Path) { + tracker.trackFieldWithSize("path", p.size()); + } + KJ_CASE_ONEOF(u, jsg::Url) { + tracker.trackFieldWithSize("path", u.getHref().size()); + } + } +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/commonjs.h b/src/workerd/jsg/commonjs.h index ad374951ee9..0c6523aa23b 100644 --- a/src/workerd/jsg/commonjs.h +++ b/src/workerd/jsg/commonjs.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -29,13 +30,14 @@ class CommonJsModuleObject final: public jsg::Object { class CommonJsModuleContext final: public jsg::Object { public: CommonJsModuleContext(jsg::Lock& js, kj::Path path); + CommonJsModuleContext(jsg::Lock&, const jsg::Url&); v8::Local require(jsg::Lock& js, kj::String specifier); jsg::Ref getModule(jsg::Lock& js); v8::Local getExports(jsg::Lock& js) const; - void setExports(jsg::Value value); + void setExports(jsg::Lock& js, jsg::Value value); kj::String getFilename() const; kj::String getDirname() const; @@ -53,7 +55,7 @@ class CommonJsModuleContext final: public jsg::Object { void visitForMemoryInfo(MemoryTracker& tracker) const; private: - kj::Path path; + kj::OneOf path; jsg::Value exports; }; @@ -115,6 +117,7 @@ class NodeJsModuleObject: public jsg::Object { class NodeJsModuleContext: public jsg::Object { public: NodeJsModuleContext(jsg::Lock& js, kj::Path path); + NodeJsModuleContext(jsg::Lock&, const jsg::Url&); v8::Local require(jsg::Lock& js, kj::String specifier); @@ -143,13 +146,10 @@ class NodeJsModuleContext: public jsg::Object { jsg::Ref module; - void visitForMemoryInfo(MemoryTracker& tracker) const { - tracker.trackField("exports", exports); - tracker.trackFieldWithSize("path", path.size()); - } + void visitForMemoryInfo(MemoryTracker& tracker) const; private: - kj::Path path; + kj::OneOf path; jsg::Value exports; }; diff --git a/src/workerd/jsg/modules-new-test.c++ b/src/workerd/jsg/modules-new-test.c++ index 8008d8d57c6..0385284cc40 100644 --- a/src/workerd/jsg/modules-new-test.c++ +++ b/src/workerd/jsg/modules-new-test.c++ @@ -65,6 +65,10 @@ struct TestType: public jsg::Object { barCalled = true; } + jsg::Ref getModule(Lock& js) { + return JSG_THIS; + } + JsObject getExports(Lock& js) { KJ_IF_SOME(exp, exports) { return exp.getHandle(js); @@ -85,6 +89,7 @@ struct TestType: public jsg::Object { JSG_METHOD(bar); JSG_METHOD(require); JSG_PROTOTYPE_PROPERTY(exports, getExports, setExports); + JSG_READONLY_PROTOTYPE_PROPERTY(module, getModule); } }; diff --git a/src/workerd/jsg/modules-new.h b/src/workerd/jsg/modules-new.h index 433279d802c..4681baeb87e 100644 --- a/src/workerd/jsg/modules-new.h +++ b/src/workerd/jsg/modules-new.h @@ -185,7 +185,7 @@ class Module { EVAL = 1 << 2, }; - // The EvalCallback is used to to ensure evaluation of a module outside of a + // The EvalCallback is used to ensure evaluation of a module outside of a // request context, when necessary. If the EvalCallback is not set, then the // Flag::EVAL on a module is ignored. If the EvalCallback is set, then any // Modules that have the Flag::EVAL set will have their evaluation deferred @@ -347,10 +347,13 @@ class Module { JsObject(wrapper.wrap(js.v8Context(), kj::none, ext.addRef())), observer); fn(js); // If there are named exports specified for the module namespace, - // then we want to examine the ext->getExports() to extract those. - auto exports = ext->getExports(js); - for (auto& name: ns.getNamedExports()) { - ns.set(js, name, exports.get(js, name)); + // then we want to examine the ext->getExports() to extract those, + // but only if the exports is an object. + auto exports = JsValue(ext->getModule(js)->getExports(js)); + KJ_IF_SOME(obj, exports.tryCast()) { + for (auto& name: ns.getNamedExports()) { + ns.set(js, name, obj.get(js, name)); + } } return ns.setDefault(js, exports); }, [&](Value exception) { diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index da2768139a7..7c70df08161 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -858,6 +859,7 @@ const WorkerdApi& WorkerdApi::from(const Worker::Api& api) { // ======================================================================================= +// This methdd is used to initialize the new module registry implementation. kj::Own WorkerdApi::initializeBundleModuleRegistry( const jsg::ResolveObserver& observer, const config::Worker::Reader& conf, @@ -871,6 +873,9 @@ kj::Own WorkerdApi::initializeBundleModuleRegistry [](kj::Own worker, const auto& module, jsg::V8Ref v8Module, const auto& observer, kj::Maybe> asyncContext) -> kj::Promise> { + // Awaiting yield here forces the event loop to turn and runs the + // continuation outside the scope of the IoContext, ensuring that + // the evaluation that comes next is unable to schedule i/o. co_await kj::yield(); KJ_ASSERT(!IoContext::hasCurrent()); auto asyncLock = co_await worker->takeAsyncLockWithoutRequest(nullptr); @@ -944,24 +949,27 @@ kj::Own WorkerdApi::initializeBundleModuleRegistry break; } case config::Worker::Module::COMMON_JS_MODULE: { - // TODO(soon): These are intentionally commented out for the time - // being and will be soon handled in a follow up PR. This branch - // is not yet taken in production. - // bundleBuilder.addSyntheticModule( - // def.getName(), jsg::modules::Module::newCjsStyleModuleHandler< - // jsg::CommonJsModuleContext, - // JsgWorkerdIsolate_TypeWrapper>( - // kj::str(def.getCommonJsModule()), - // kj::str(def.getName()))); + kj::Vector exports(def.getNamedExports().size()); + for (auto name: def.getNamedExports()) { + exports.add(kj::str(name)); + } + bundleBuilder.addSyntheticModule(def.getName(), + jsg::modules::Module::newCjsStyleModuleHandler( + kj::str(def.getCommonJsModule()), kj::str(def.getName())), + exports.releaseAsArray()); break; } case config::Worker::Module::NODE_JS_COMPAT_MODULE: { - // bundleBuilder.addSyntheticModule( - // def.getName(), jsg::modules::Module::newCjsStyleModuleHandler< - // jsg::NodeJsModuleContext, - // JsgWorkerdIsolate_TypeWrapper>( - // kj::str(def.getNodeJsCompatModule()), - // kj::str(def.getName()))); + kj::Vector exports(def.getNamedExports().size()); + for (auto name: def.getNamedExports()) { + exports.add(kj::str(name)); + } + bundleBuilder.addSyntheticModule(def.getName(), + jsg::modules::Module::newCjsStyleModuleHandler( + kj::str(def.getNodeJsCompatModule()), kj::str(def.getName())), + exports.releaseAsArray()); break; } case config::Worker::Module::PYTHON_MODULE: {