Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Verify Bitwise Reproducibility of RBFE Endstate Trajectories #1506

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Dockerfile
.dockerignore
.gitlab-ci.yml
pytest-artifacts/

# Git Files
**/.git*
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dist/
*.egg-info/
.coverage*
.hypothesis/
pytest-artifacts/

# Nsys profiling files
*.sqlite
Expand Down
12 changes: 11 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ stages: # List of stages for jobs, and their order of execution

variables:
# The default CI branch should slugify to the same name, such that $CI_COMMIT_REF_SLUG == $CI_DEFAULT_BRANCH
# which allows branches to use the default's branchs image as a cache
# which allows branches to use the default's branches image as a cache
DOCKER_TAG: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
# Directory to write CI artifacts to, read in tests/common.py
CI_ARTIFACT_DIR: pytest-artifacts/

workflow:
rules:
Expand Down Expand Up @@ -60,6 +62,7 @@ nocuda-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nocuda-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand Down Expand Up @@ -103,6 +106,7 @@ nogpu-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nogpu-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -121,6 +125,7 @@ memory-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-memtests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -139,6 +144,7 @@ unit-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -157,6 +163,7 @@ nightly-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nightly-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -175,6 +182,7 @@ nightly-memcheck-tests:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nightly-memcheck-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -193,6 +201,7 @@ nightly-tests-nocuda:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nocuda-nightly-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week

Expand All @@ -214,5 +223,6 @@ nightly-tests-nogpu:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME-nogpu-nightly-tests"
paths:
- coverage/
- $CI_ARTIFACT_DIR
when: on_success
expire_in: 1 week
2 changes: 2 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from timemachine.utils import path_to_internal_file

HILBERT_GRID_DIM = 128
# Directory to write files to that will be stored as artifacts in CI.
ARTIFACT_DIR_NAME = os.environ.get("CI_ARTIFACT_DIR", "pytest-artifacts")


@contextlib.contextmanager
Expand Down
265 changes: 148 additions & 117 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import os
import pickle
import subprocess
Expand All @@ -9,11 +10,11 @@

import numpy as np
import pytest
from common import temporary_working_dir
from common import ARTIFACT_DIR_NAME, temporary_working_dir
from numpy.typing import NDArray as Array
from scipy.special import logsumexp

from timemachine.constants import DEFAULT_KT, KCAL_TO_KJ
from timemachine.constants import DEFAULT_FF, DEFAULT_KT, KCAL_TO_KJ
from timemachine.datasets import fetch_freesolv
from timemachine.fe.free_energy import assert_deep_eq
from timemachine.fe.utils import get_mol_name
Expand All @@ -22,6 +23,23 @@
EXAMPLES_DIR = Path(__file__).parent.parent / "examples"


def hash_directory_files(dir_path: Path, extensions: list[str], chunk_size: int = 2048) -> str:
assert dir_path.is_dir(), f"{dir_path!s} doesn't exist"
m = hashlib.sha256()
assert len(extensions) > 0
for directory, _, files in dir_path.walk():
for f_name in sorted(files):
path = directory / f_name
if path.suffix not in extensions:
continue
with open(path, "rb") as ifs:
chunk = ifs.read(chunk_size)
while len(chunk) > 0:
m.update(chunk)
chunk = ifs.read(chunk_size)
return m.hexdigest()


def run_example(
example_name: str, cli_args: list[str], env: Optional[dict[str, str]] = None, cwd: Optional[str] = None
) -> subprocess.CompletedProcess:
Expand Down Expand Up @@ -236,118 +254,131 @@ def test_run_rbfe_legs(
mol_b,
seed,
):
with temporary_working_dir() as temp_dir:
with resources.as_file(resources.files("timemachine.datasets.fep_benchmark.hif2a")) as hif2a_dir:
config = dict(
mol_a=mol_a,
mol_b=mol_b,
sdf_path=hif2a_dir / "ligands.sdf",
pdb_path=hif2a_dir / "5tbm_prepared.pdb",
seed=seed,
legs=leg,
n_eq_steps=n_eq_steps,
n_frames=n_frames,
n_windows=n_windows,
# Use simple charges to avoid os-dependent charge differences
forcefield="smirnoff_1_1_0_sc.py",
)

def verify_run(output_dir: Path):
assert output_dir.is_dir()
assert (output_dir / "md_params.pkl").is_file()
assert (output_dir / "atom_mapping.svg").is_file()
assert (output_dir / "core.pkl").is_file()
assert (output_dir / "ff.py").is_file()

assert Forcefield.load_from_file(output_dir / "ff.py") is not None

leg_dir = output_dir / leg
assert leg_dir.is_dir()
assert (leg_dir / "results.npz").is_file()
assert (leg_dir / "lambda0_traj.npz").is_file()
assert (leg_dir / "lambda1_traj.npz").is_file()

assert (leg_dir / "simulation_result.pkl").is_file()
if leg in ["solvent", "complex"]:
assert (leg_dir / "host_config.pkl").is_file()
else:
assert not (leg_dir / "host_config.pkl").is_file()
assert (leg_dir / "hrex_transition_matrix.png").is_file()
assert (leg_dir / "hrex_swap_acceptance_rates_convergence.png").is_file()
assert (leg_dir / "hrex_replica_state_distribution_heatmap.png").is_file()

results = np.load(str(leg_dir / "results.npz"))
assert results["pred_dg"].size == 1
assert results["pred_dg"].dtype == np.float64
assert results["pred_dg"] != 0.0

assert results["pred_dg_err"].size == 1
assert results["pred_dg_err"].dtype == np.float64
assert results["pred_dg_err"] != 0.0

assert results["n_windows"].size == 1
assert results["n_windows"].dtype == np.intp
assert 2 <= results["n_windows"] <= config["n_windows"]
assert isinstance(results["overlaps"], np.ndarray)
assert all(isinstance(overlap, float) for overlap in results["overlaps"])

for lamb in [0, 1]:
traj_data = np.load(str(leg_dir / f"lambda{lamb:d}_traj.npz"))
assert len(traj_data["coords"]) == n_frames
assert len(traj_data["boxes"]) == n_frames

config_a = dict(output_dir="a", **config)
proc = run_example("run_rbfe_legs.py", get_cli_args(config_a), cwd=temp_dir)
assert proc.returncode == 0
verify_run(Path(temp_dir) / config_a["output_dir"])

config_b = dict(output_dir="b", **config)
assert config_b["output_dir"] != config_a["output_dir"], "Runs are writing to the same output directory"
proc = run_example("run_rbfe_legs.py", get_cli_args(config_b), cwd=temp_dir)
assert proc.returncode == 0
verify_run(Path(temp_dir) / config_b["output_dir"])

def verify_simulations_match(ref_dir: Path, comp_dir: Path):
with open(ref_dir / "md_params.pkl", "rb") as ifs:
ref_md_params = pickle.load(ifs)
with open(comp_dir / "md_params.pkl", "rb") as ifs:
comp_md_params = pickle.load(ifs)
assert ref_md_params == comp_md_params, "MD Parameters don't match"

with open(ref_dir / "core.pkl", "rb") as ifs:
ref_core = pickle.load(ifs)
with open(comp_dir / "core.pkl", "rb") as ifs:
comp_core = pickle.load(ifs)
assert np.all(ref_core == comp_core), "Atom mappings don't match"

ref_results = np.load(str(ref_dir / leg / "results.npz"))
comp_results = np.load(str(comp_dir / leg / "results.npz"))
np.testing.assert_equal(ref_results["pred_dg"], comp_results["pred_dg"])
np.testing.assert_equal(ref_results["pred_dg_err"], comp_results["pred_dg_err"])
np.testing.assert_array_equal(ref_results["overlaps"], comp_results["overlaps"])
np.testing.assert_equal(ref_results["n_windows"], comp_results["n_windows"])

with open(ref_dir / leg / "simulation_result.pkl", "rb") as ifs:
ref_res = pickle.load(ifs)
with open(comp_dir / leg / "simulation_result.pkl", "rb") as ifs:
comp_res = pickle.load(ifs)
assert len(ref_res.final_result.initial_states) == ref_results["n_windows"]
assert len(ref_res.final_result.initial_states) == len(comp_res.final_result.initial_states)

for ref_state, comp_state in zip(
ref_res.final_result.initial_states, comp_res.final_result.initial_states
):
np.testing.assert_array_equal(ref_state.x0, comp_state.x0)
np.testing.assert_array_equal(ref_state.v0, comp_state.v0)
np.testing.assert_array_equal(ref_state.box0, comp_state.box0)
np.testing.assert_array_equal(ref_state.ligand_idxs, comp_state.ligand_idxs)
np.testing.assert_array_equal(ref_state.protein_idxs, comp_state.protein_idxs)
assert_deep_eq(ref_state.potentials, comp_state.potentials)

for lamb in [0, 1]:
ref_traj = np.load(str(ref_dir / leg / f"lambda{lamb}_traj.npz"))
comp_traj = np.load(str(comp_dir / leg / f"lambda{lamb}_traj.npz"))
np.testing.assert_array_equal(ref_traj["coords"], comp_traj["coords"])
np.testing.assert_array_equal(ref_traj["boxes"], comp_traj["boxes"])

verify_simulations_match(Path(temp_dir) / config_a["output_dir"], Path(temp_dir) / config_b["output_dir"])
# To update the leg result hashes, refer to the hashes generated from CI runs.
# The CI jobs produce an artifact for the results stored at ARTIFACT_DIR_NAME
# which can be used to investigate the results that generated the hash.
leg_result_hashes = {
"vacuum": "81fe2a16aa7eb89e6a05e1d4e162d24b57e4c0c3effe07f6ff548ca32618d8e6",
"solvent": "6d8b39f723727d556b47e0907e89f1afe9843cae0b2e6107a745d1c29f9a3c8d",
"complex": "b3c18188bd8fe2475152b5c1c8b9d81799037f8c33685db5834943a140cc2988",
}
with resources.as_file(resources.files("timemachine.datasets.fep_benchmark.hif2a")) as hif2a_dir:
config = dict(
mol_a=mol_a,
mol_b=mol_b,
sdf_path=hif2a_dir / "ligands.sdf",
pdb_path=hif2a_dir / "5tbm_prepared.pdb",
seed=seed,
legs=leg,
n_eq_steps=n_eq_steps,
n_frames=n_frames,
n_windows=n_windows,
forcefield=DEFAULT_FF,
output_dir=f"{ARTIFACT_DIR_NAME}/rbfe_{mol_a}_{mol_b}_{leg}_{seed}",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does each CI run get a fresh artifact directory? (not sure how this works on the gitlab side...)

If not, should we add an ID or timestamp to the path to avoid races with concurrent CI jobs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, each CI run gets a fresh artifact. The coverage report is similarly reported currently.

)

def verify_run(output_dir: Path):
assert output_dir.is_dir()
assert (output_dir / "md_params.pkl").is_file()
assert (output_dir / "atom_mapping.svg").is_file()
assert (output_dir / "core.pkl").is_file()
assert (output_dir / "ff.py").is_file()

assert Forcefield.load_from_file(output_dir / "ff.py") is not None

leg_dir = output_dir / leg
assert leg_dir.is_dir()
assert (leg_dir / "results.npz").is_file()
assert (leg_dir / "lambda0_traj.npz").is_file()
assert (leg_dir / "lambda1_traj.npz").is_file()

assert (leg_dir / "simulation_result.pkl").is_file()
if leg in ["solvent", "complex"]:
assert (leg_dir / "host_config.pkl").is_file()
else:
assert not (leg_dir / "host_config.pkl").is_file()
assert (leg_dir / "hrex_transition_matrix.png").is_file()
assert (leg_dir / "hrex_swap_acceptance_rates_convergence.png").is_file()
assert (leg_dir / "hrex_replica_state_distribution_heatmap.png").is_file()

results = np.load(str(leg_dir / "results.npz"))
assert results["pred_dg"].size == 1
assert results["pred_dg"].dtype == np.float64
assert results["pred_dg"] != 0.0

assert results["pred_dg_err"].size == 1
assert results["pred_dg_err"].dtype == np.float64
assert results["pred_dg_err"] != 0.0

assert results["n_windows"].size == 1
assert results["n_windows"].dtype == np.intp
assert 2 <= results["n_windows"] <= config["n_windows"]
assert isinstance(results["overlaps"], np.ndarray)
assert all(isinstance(overlap, float) for overlap in results["overlaps"])

for lamb in [0, 1]:
traj_data = np.load(str(leg_dir / f"lambda{lamb:d}_traj.npz"))
assert len(traj_data["coords"]) == n_frames
assert len(traj_data["boxes"]) == n_frames

def verify_leg_result_hashes(output_dir: Path):
leg_dir = output_dir / leg
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason not to just recursively hash everything in the subdirectory? (vs. the current behavior of only hashing the trajectory npz file)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think selectively hashing things based on #1506 (comment) makes sense

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to hash the npz files (don't expect the pickles or plots to be reproducible) in 673dd18

Copy link
Collaborator

@mcwitt mcwitt Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: one benefit I see of separate hashes is being able to immediately tell whether it was only the dGs that changed (versus e.g. trajectories and dGs). I think the change would be relatively simple (e.g. assert on a tuple of hashes matching instead of effectively comparing a hash of the whole tuple)

leg_results_hash = hash_directory_files(leg_dir, ["npz"])
assert leg_results_hash == leg_result_hashes[leg]

config_a = config.copy()
config_a["output_dir"] = config["output_dir"] + "_a"
proc = run_example("run_rbfe_legs.py", get_cli_args(config_a))
assert proc.returncode == 0
verify_run(Path(config_a["output_dir"]))
verify_leg_result_hashes(Path(config_a["output_dir"]))

config_b = config.copy()
config_b["output_dir"] = config["output_dir"] + "_b"
assert config_b["output_dir"] != config_a["output_dir"], "Runs are writing to the same output directory"
proc = run_example("run_rbfe_legs.py", get_cli_args(config_b))
assert proc.returncode == 0
verify_run(Path(config_b["output_dir"]))

def verify_simulations_match(ref_dir: Path, comp_dir: Path):
with open(ref_dir / "md_params.pkl", "rb") as ifs:
ref_md_params = pickle.load(ifs)
with open(comp_dir / "md_params.pkl", "rb") as ifs:
comp_md_params = pickle.load(ifs)
assert ref_md_params == comp_md_params, "MD Parameters don't match"

with open(ref_dir / "core.pkl", "rb") as ifs:
ref_core = pickle.load(ifs)
with open(comp_dir / "core.pkl", "rb") as ifs:
comp_core = pickle.load(ifs)
assert np.all(ref_core == comp_core), "Atom mappings don't match"

ref_results = np.load(str(ref_dir / leg / "results.npz"))
comp_results = np.load(str(comp_dir / leg / "results.npz"))
np.testing.assert_equal(ref_results["pred_dg"], comp_results["pred_dg"])
np.testing.assert_equal(ref_results["pred_dg_err"], comp_results["pred_dg_err"])
np.testing.assert_array_equal(ref_results["overlaps"], comp_results["overlaps"])
np.testing.assert_equal(ref_results["n_windows"], comp_results["n_windows"])

with open(ref_dir / leg / "simulation_result.pkl", "rb") as ifs:
ref_res = pickle.load(ifs)
with open(comp_dir / leg / "simulation_result.pkl", "rb") as ifs:
comp_res = pickle.load(ifs)
assert len(ref_res.final_result.initial_states) == ref_results["n_windows"]
assert len(ref_res.final_result.initial_states) == len(comp_res.final_result.initial_states)

for ref_state, comp_state in zip(ref_res.final_result.initial_states, comp_res.final_result.initial_states):
np.testing.assert_array_equal(ref_state.x0, comp_state.x0)
np.testing.assert_array_equal(ref_state.v0, comp_state.v0)
np.testing.assert_array_equal(ref_state.box0, comp_state.box0)
np.testing.assert_array_equal(ref_state.ligand_idxs, comp_state.ligand_idxs)
np.testing.assert_array_equal(ref_state.protein_idxs, comp_state.protein_idxs)
assert_deep_eq(ref_state.potentials, comp_state.potentials)

for lamb in [0, 1]:
ref_traj = np.load(str(ref_dir / leg / f"lambda{lamb}_traj.npz"))
comp_traj = np.load(str(comp_dir / leg / f"lambda{lamb}_traj.npz"))
np.testing.assert_array_equal(ref_traj["coords"], comp_traj["coords"])
np.testing.assert_array_equal(ref_traj["boxes"], comp_traj["boxes"])

verify_simulations_match(Path(config_a["output_dir"]), Path(config_b["output_dir"]))