Skip to content

Commit

Permalink
Merge pull request #3379 from cloudflare/dominik/embed-package-locks
Browse files Browse the repository at this point in the history
Embeds Python package locks in the binary.
  • Loading branch information
dom96 authored Jan 22, 2025
2 parents 00d8c87 + 8a81ca7 commit ae1734d
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 45 deletions.
16 changes: 10 additions & 6 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,17 @@ http_archive(
urls = ["https://github.com/dom96/pyodide_packages/releases/download/just-stdlib/pyodide_packages.tar.zip"],
)

load("//:build/pyodide_bucket.bzl", "PYODIDE_ALL_WHEELS_ZIP_SHA256", "PYODIDE_GITHUB_RELEASE_URL", "PYODIDE_LOCK_SHA256")
load("//:build/pyodide_bucket.bzl", "PYODIDE_ALL_WHEELS_ZIP_SHA256", "PYODIDE_GITHUB_RELEASE_URL")
load("//:build/python_metadata.bzl", "PYTHON_LOCKFILES")

http_file(
name = "pyodide-lock.json",
sha256 = PYODIDE_LOCK_SHA256,
url = PYODIDE_GITHUB_RELEASE_URL + "pyodide-lock.json",
)
[
http_file(
name = "pyodide-lock_" + package_date + ".json",
sha256 = package_lock_sha,
url = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/" + package_date + "/pyodide-lock.json",
)
for package_date, package_lock_sha in PYTHON_LOCKFILES.items()
]

http_archive(
name = "all_pyodide_wheels",
Expand Down
1 change: 0 additions & 1 deletion build/pyodide_bucket.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# both edgeworker and workerd, as well as src/pyodide/BUILD.bazel
PYODIDE_PACKAGE_BUCKET_URL = "https://pyodide-packages.runtime-playground.workers.dev/20240513.2/"
PYODIDE_GITHUB_RELEASE_URL = "https://github.com/cloudflare/pyodide-build-scripts/releases/download/20240513.2/"
PYODIDE_LOCK_SHA256 = "51eb3fd8dae5f551e2393ac58edfaf6a6c8d9c51b39c1584dd5d74bd7fb803fc"
PYODIDE_PACKAGES_TAR_ZIP_SHA256 = "b71d4c3cee3b6bd12969a788545f4159fb1eb984a7ca5de2493c4fa8479beeec"
PYODIDE_ALL_WHEELS_ZIP_SHA256 = "c17feb45fdcb4b41eab9c719e69c9e062a8fc88344fcb6bbd7de0de92c3ae660"

Expand Down
9 changes: 9 additions & 0 deletions build/python_metadata.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# The below is a list of pyodide-lock.json files for each package bundle version that we support.
# Each of these gets embedded in the workerd and EW binary.
#
# The key is the `packages` field in pythonSnapshotRelease and the value is the sha256 checksum of
# the lock file.
PYTHON_LOCKFILES = {
"20240829.4": "c2d9c67ea55a672b95a3beb8d66bfbe7df736edb4bb657383b263151e7e85ef4",
"20241218": "1421e9351baf24ec44d82f78b9ac26e8e0e6595bfe3f626dedb33147bfcd1998",
}
48 changes: 33 additions & 15 deletions src/pyodide/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library")
load("//:build/capnp_embed.bzl", "capnp_embed")
load("//:build/js_file.bzl", "js_file")
load("//:build/pyodide_bucket.bzl", "PYODIDE_PACKAGE_BUCKET_URL")
load("//:build/python_metadata.bzl", "PYTHON_LOCKFILES")
load("//:build/wd_ts_bundle.bzl", "wd_ts_bundle")

copy_file(
Expand Down Expand Up @@ -35,7 +36,19 @@ capnp_embed(
copy_file(
name = "pyodide_extra_capnp_file",
src = "pyodide_extra.capnp",
out = "generated/pyodide_extra_tmpl.capnp",
)

expand_template(
name = "pyodide_extra_expand_template@rule",
out = "generated/pyodide_extra.capnp",
substitutions = {
"%PACKAGE_LOCKS": ",".join([
"(packageDate = \"" + package_date + "\", lock = embed \"pyodide-lock_" + package_date + ".json\")"
for package_date in PYTHON_LOCKFILES.keys()
]),
},
template = "generated/pyodide_extra_tmpl.capnp",
)

capnp_embed(
Expand All @@ -44,21 +57,17 @@ capnp_embed(
deps = ["pyodide_extra_capnp_file"],
)

capnp_embed(
name = "pyodide_lock_file_embed",
src = "generated/pyodide-lock.json",
deps = ["pyodide-lock.js@rule"],
)

cc_capnp_library(
name = "pyodide_extra_capnp",
srcs = ["generated/pyodide_extra.capnp"],
visibility = ["//visibility:public"],
deps = [
":pyodide_extra_file_embed",
":pyodide_lock_file_embed",
":pyodide_packages_archive_embed",
":python_entrypoint_file_embed",
] + [
":pyodide_lock_" + package_date + "_file_embed"
for package_date in PYTHON_LOCKFILES.keys()
],
)

Expand All @@ -74,11 +83,23 @@ copy_file(
out = "generated/python_stdlib.zip",
)

copy_file(
name = "pyodide-lock.js@rule",
src = "@pyodide-lock.json//file",
out = "generated/pyodide-lock.json",
)
[
copy_file(
name = "pyodide-lock_" + package_date + ".json@copy_file_rule",
src = "@pyodide-lock_" + package_date + ".json//file",
out = "generated/pyodide-lock_" + package_date + ".json",
)
for package_date, package_lock_sha in PYTHON_LOCKFILES.items()
]

[
capnp_embed(
name = "pyodide_lock_" + package_date + "_file_embed",
src = "generated/pyodide-lock_" + package_date + ".json",
deps = ["pyodide-lock_" + package_date + ".json@copy_file_rule"],
)
for package_date, package_lock_sha in PYTHON_LOCKFILES.items()
]

# pyodide.asm.js patches
# TODO: all of these should be fixed by linking our own Pyodide or by upstreaming.
Expand Down Expand Up @@ -235,15 +256,13 @@ wd_ts_bundle(
import_name = "pyodide",
internal_data_modules = INTERNAL_DATA_MODULES,
internal_json_modules = [
"generated/pyodide-lock.json",
"generated/pyodide-bucket.json",
],
internal_modules = INTERNAL_MODULES,
js_deps = [
"generated/emscriptenSetup",
"pyodide.asm.wasm@rule",
"python_stdlib.zip@rule",
"pyodide-lock.js@rule",
"pyodide-bucket.json@rule",
],
lint = False,
Expand Down Expand Up @@ -279,7 +298,6 @@ genrule(
":pyodide-internal_generated_emscriptenSetup.js",
":pyodide-internal_generated_pyodide.asm.wasm",
":pyodide-internal_generated_python_stdlib.zip",
":pyodide-internal_generated_pyodide-lock.json",
":pyodide-internal_generated_pyodide-bucket.json",
],
outs = ["pyodide.capnp.bin"],
Expand Down
6 changes: 3 additions & 3 deletions src/pyodide/internal/metadata.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
import { default as PYODIDE_BUCKET } from 'pyodide-internal:generated/pyodide-bucket.json';
// The pyodide-lock.json is read from the Python bundle (pyodide-capnp-bin).
import { default as PYODIDE_LOCK } from 'pyodide-internal:generated/pyodide-lock.json';
import { default as ArtifactBundler } from 'pyodide-internal:artifacts';

export const IS_WORKERD = MetadataReader.isWorkerd();
Expand All @@ -14,7 +12,9 @@ export const LOAD_WHEELS_FROM_R2: boolean = IS_WORKERD;
export const LOAD_WHEELS_FROM_ARTIFACT_BUNDLER =
MetadataReader.shouldUsePackagesInArtifactBundler();
export const PACKAGES_VERSION = MetadataReader.getPackagesVersion();
export const LOCKFILE: PackageLock = PYODIDE_LOCK;
export const LOCKFILE: PackageLock = JSON.parse(
MetadataReader.getPackagesLock()
);
export const REQUIREMENTS = MetadataReader.getRequirements();
export const MAIN_MODULE_NAME = MetadataReader.getMainModule();
export const MEMORY_SNAPSHOT_READER = MetadataReader.hasMemorySnapshot()
Expand Down
8 changes: 7 additions & 1 deletion src/pyodide/pyodide_extra.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,10 @@

const pythonEntrypoint :Text = embed "python-entrypoint.js";
const pyodidePackagesTar :Data = embed "pyodide_packages.tar";
const pyodideLock :Text = embed "pyodide-lock.json";
struct PackageLock {
packageDate @0 :Text;
lock @1 :Text;
}
const packageLocks :List(PackageLock) = [
%PACKAGE_LOCKS
];
1 change: 1 addition & 0 deletions src/pyodide/types/runtime-generated/metadata.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ declare namespace MetadataReader {
const disposeMemorySnapshot: () => void;
const shouldUsePackagesInArtifactBundler: () => boolean;
const getPackagesVersion: () => string;
const getPackagesLock: () => string;
const read: (index: number, position: number, buffer: Uint8Array) => number;
}

Expand Down
1 change: 1 addition & 0 deletions src/workerd/api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ wd_cc_library(
hdrs = [
"pyodide/pyodide.h",
"pyodide/setup-emscripten.h",
"//src/pyodide:generated/pyodide_extra.capnp.h",
],
implementation_deps = ["//src/workerd/util:string-buffer"],
visibility = ["//visibility:public"],
Expand Down
31 changes: 15 additions & 16 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <workerd/util/string-buffer.h>
#include <workerd/util/strings.h>

#include <pyodide/generated/pyodide_extra.capnp.h>

#include <kj/array.h>
#include <kj/common.h>
#include <kj/debug.h>
Expand All @@ -23,22 +25,6 @@ const kj::Maybe<jsg::Bundle::Reader> PyodideBundleManager::getPyodideBundle(
[](const MessageBundlePair& t) { return t.bundle; });
}

kj::Maybe<kj::String> PyodideBundleManager::getPyodideLock(
PythonSnapshotRelease::Reader pythonSnapshotRelease) const {
auto bundleName = getPythonBundleName(pythonSnapshotRelease);
// We expect the Pyodide Bundle for the specified bundle name to already be downloaded here.
auto maybeBundle = getPyodideBundle(bundleName);
auto bundle = KJ_ASSERT_NONNULL(maybeBundle);
for (auto module: bundle.getModules()) {
if (module.which() == workerd::jsg::Module::JSON &&
module.getName() == "pyodide-internal:generated/pyodide-lock.json") {
return kj::str(module.getJson());
}
}

return kj::none;
}

void PyodideBundleManager::setPyodideBundleData(
kj::String version, kj::Array<unsigned char> data) const {
auto wordArray = kj::arrayPtr(
Expand Down Expand Up @@ -400,6 +386,16 @@ kj::Array<kj::StringPtr> ArtifactBundler::getSnapshotImports() {
return result.releaseAsArray();
}

kj::Maybe<kj::String> getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease) {
for (auto pkgLock: *PACKAGE_LOCKS) {
if (pkgLock.getPackageDate() == pythonSnapshotRelease.getPackages()) {
return kj::str(pkgLock.getLock());
}
}

return kj::none;
}

jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf,
const PythonConfig& pythonConfig,
PythonSnapshotRelease::Reader pythonRelease) {
Expand Down Expand Up @@ -452,6 +448,8 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf,
bool createSnapshot = pythonConfig.createSnapshot;
bool createBaselineSnapshot = pythonConfig.createBaselineSnapshot;
bool snapshotToDisk = createSnapshot || createBaselineSnapshot;
auto lock = KJ_ASSERT_NONNULL(getPyodideLock(pythonRelease),
kj::str("No lock file defined for Python packages release ", pythonRelease.getPackages()));

// clang-format off
return jsg::alloc<PyodideMetadataReader>(
Expand All @@ -460,6 +458,7 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf,
contents.finish(),
requirements.finish(),
kj::str(pythonRelease.getPackages()),
kj::mv(lock),
true /* isWorkerd */,
false /* isTracing */,
snapshotToDisk,
Expand Down
11 changes: 10 additions & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class PyodideBundleManager {
public:
void setPyodideBundleData(kj::String version, kj::Array<unsigned char> data) const;
const kj::Maybe<jsg::Bundle::Reader> getPyodideBundle(kj::StringPtr version) const;
kj::Maybe<kj::String> getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease) const;

private:
struct MessageBundlePair {
Expand Down Expand Up @@ -79,6 +78,7 @@ class PyodideMetadataReader: public jsg::Object {
kj::Array<kj::Array<kj::byte>> contents;
kj::Array<kj::String> requirements;
kj::String packagesVersion;
kj::String packagesLock;
bool isWorkerdFlag;
bool isTracingFlag;
bool snapshotToDisk;
Expand All @@ -92,6 +92,7 @@ class PyodideMetadataReader: public jsg::Object {
kj::Array<kj::Array<kj::byte>> contents,
kj::Array<kj::String> requirements,
kj::String packagesVersion,
kj::String packagesLock,
bool isWorkerd,
bool isTracing,
bool snapshotToDisk,
Expand All @@ -103,6 +104,7 @@ class PyodideMetadataReader: public jsg::Object {
contents(kj::mv(contents)),
requirements(kj::mv(requirements)),
packagesVersion(kj::mv(packagesVersion)),
packagesLock(kj::mv(packagesLock)),
isWorkerdFlag(isWorkerd),
isTracingFlag(isTracing),
snapshotToDisk(snapshotToDisk),
Expand Down Expand Up @@ -165,6 +167,10 @@ class PyodideMetadataReader: public jsg::Object {
return kj::str(packagesVersion);
}

kj::String getPackagesLock() {
return kj::str(packagesLock);
}

JSG_RESOURCE_TYPE(PyodideMetadataReader) {
JSG_METHOD(isWorkerd);
JSG_METHOD(isTracing);
Expand All @@ -181,6 +187,7 @@ class PyodideMetadataReader: public jsg::Object {
JSG_METHOD(shouldSnapshotToDisk);
JSG_METHOD(shouldUsePackagesInArtifactBundler);
JSG_METHOD(getPackagesVersion);
JSG_METHOD(getPackagesLock);
JSG_METHOD(isCreatingBaselineSnapshot);
}

Expand Down Expand Up @@ -416,6 +423,8 @@ class SetupEmscripten: public jsg::Object {
void visitForGc(jsg::GcVisitor& visitor);
};

kj::Maybe<kj::String> getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease);

using Worker = server::config::Worker;

jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(Worker::Reader conf,
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/io/compatibility-date.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef {
$compatEnableFlag("python_workers_20250116")
$experimental
$pythonSnapshotRelease(pyodide = "0.27.1", pyodideRevision = "2025-01-16",
packages = "2024-12-18", backport = 0,
packages = "20241218", backport = 0,
baselineSnapshotHash = "TODO");

requestCfOverridesCacheRules @72 :Bool
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/server/workerd.c++
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ class CliMain final: public SchemaFileImpl::ErrorReporter {
auto version = getPythonBundleName(pythonRelease);
KJ_ASSERT_NONNULL(fetchPyodideBundle(config, version), "Failed to get Pyodide bundle");

auto lock = KJ_ASSERT_NONNULL(config.pyodideBundleManager.getPyodideLock(pythonRelease));
auto lock = KJ_ASSERT_NONNULL(api::pyodide::getPyodideLock(pythonRelease));

printf("%s\n", lock.cStr());
fflush(stdout);
Expand Down

0 comments on commit ae1734d

Please # to comment.