Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

[ENH]: Implement junifer reset to delete job/results file. #240

Merged
merged 6 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/newsfragments/240.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ``junifer reset`` to reset storage and jobs directory by `Synchon Mandal`_
38 changes: 38 additions & 0 deletions junifer/api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from .functions import collect as api_collect
from .functions import queue as api_queue
from .functions import reset as api_reset
from .functions import run as api_run
from .parser import parse_yaml
from .utils import (
Expand Down Expand Up @@ -414,6 +415,43 @@ def selftest(subpkg: str) -> None:
click.secho("Failure.", fg="red")


@cli.command()
@click.argument(
"filepath",
type=click.Path(
exists=True, readable=True, dir_okay=False, path_type=pathlib.Path
),
)
@click.option(
"-v",
"--verbose",
type=click.UNPROCESSED,
callback=_validate_verbose,
default="info",
)
def reset(
filepath: click.Path,
verbose: Union[str, int],
) -> None:
"""Reset command for CLI.

\f

Parameters
----------
filepath : click.Path
The filepath to the configuration file.
verbose : click.Choice
The verbosity level: warning, info or debug (default "info").

"""
configure_logging(level=verbose)
# Parse YAML
config = parse_yaml(filepath)
# Perform operation
api_reset(config)


@cli.group()
def setup() -> None: # pragma: no cover
"""Configure commands for Junifer."""
Expand Down
37 changes: 37 additions & 0 deletions junifer/api/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,3 +709,40 @@ def _queue_slurm(
# logger.info(
# f"SLURM job files created, to submit the job, run `{cmd}`"
# )


def reset(config: Dict) -> None:
"""Reset the storage and jobs directory.

Parameters
----------
config : dict
The configuration to be used for resetting.

"""
# Fetch storage
storage = config["storage"]
storage_uri = Path(storage["uri"])
logger.info(f"Deleting {storage_uri.resolve()!s}")
# Delete storage; will be str
if storage_uri.exists():
# Delete files in the directory
for file in storage_uri.iterdir():
file.unlink(missing_ok=True)
# Remove directory
storage_uri.parent.rmdir()

# Fetch job name (if present)
if config.get("queue") is not None:
queue = config["queue"]
job_dir = (
Path.cwd()
/ "junifer_jobs"
/ (queue.get("jobname") or "junifer_job")
)
logger.info(f"Deleting job directory at {job_dir.resolve()!s}")
if job_dir.exists():
# Remove files and directories
shutil.rmtree(job_dir)
# Remove directory
job_dir.parent.rmdir()
111 changes: 109 additions & 2 deletions junifer/api/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,21 @@
# License: AGPL

from pathlib import Path
from typing import List, Tuple
from typing import Callable, List, Tuple

import pytest
from click.testing import CliRunner
from ruamel.yaml import YAML

from junifer.api.cli import _parse_elements_file, collect, run, selftest, wtf
from junifer.api.cli import (
_parse_elements_file,
collect,
queue,
reset,
run,
selftest,
wtf,
)


# Configure YAML class
Expand Down Expand Up @@ -181,6 +189,105 @@ def test_multi_element_access(
assert read_elements == expected_list


def test_queue(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Test queue command.

Parameters
----------
tmp_path : pathlib.Path
The path to the test directory.
monkeypatch : pytest.MonkeyPatch
The pytest.MonkeyPatch object.

"""
with monkeypatch.context() as m:
m.chdir(tmp_path)
# Get test config
infile = Path(__file__).parent / "data" / "gmd_mean_htcondor.yaml"
# Read test config
contents = yaml.load(infile)
# Working directory
contents["workdir"] = str(tmp_path.resolve())
# Storage
contents["storage"]["uri"] = str((tmp_path / "out.sqlite").resolve())
# Write new test config
outfile = tmp_path / "in.yaml"
yaml.dump(contents, stream=outfile)
# Queue command arguments
queue_args = [
str(outfile.resolve()),
"--verbose",
"debug",
]
# Invoke queue command
queue_result = runner.invoke(queue, queue_args)
# Check
assert queue_result.exit_code == 0


@pytest.mark.parametrize(
"action, action_file",
[
(run, "gmd_mean.yaml"),
(queue, "gmd_mean_htcondor.yaml"),
],
)
def test_reset(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
action: Callable,
action_file: str,
) -> None:
"""Test reset command.

Parameters
----------
tmp_path : pathlib.Path
The path to the test directory.
monkeypatch : pytest.MonkeyPatch
The pytest.MonkeyPatch object.
action : callable
The parametrized action to perform.
action_file : str
The parametrized file for the action.

"""
with monkeypatch.context() as m:
m.chdir(tmp_path)
# Get test config
infile = Path(__file__).parent / "data" / action_file
# Read test config
contents = yaml.load(infile)
# Working directory
contents["workdir"] = str(tmp_path.resolve())
# Storage
contents["storage"]["uri"] = str((tmp_path / "out.sqlite").resolve())
# Write new test config
outfile = tmp_path / "in.yaml"
yaml.dump(contents, stream=outfile)
# Command arguments
action_args = [
str(outfile.resolve()),
"--verbose",
"debug",
]
# Invoke command
result = runner.invoke(action, action_args)
# Check
assert result.exit_code == 0

# Reset arguments
reset_args = [
str(outfile.resolve()),
"--verbose",
"debug",
]
# Run reset
reset_result = runner.invoke(reset, reset_args)
# Check
assert reset_result.exit_code == 0


def test_wtf_short() -> None:
"""Test short version of wtf command."""
# Invoke wtf command
Expand Down
85 changes: 84 additions & 1 deletion junifer/api/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ruamel.yaml import YAML

import junifer.testing.registry # noqa: F401
from junifer.api.functions import collect, queue, run
from junifer.api.functions import collect, queue, reset, run
from junifer.datagrabber.base import BaseDataGrabber
from junifer.pipeline.registry import build

Expand Down Expand Up @@ -867,3 +867,86 @@ def test_queue_condor_submission_fail(
def test_queue_slurm() -> None:
"""Test job queueing in SLURM."""
pass


def test_reset_run(tmp_path: Path) -> None:
"""Test reset function for run.

Parameters
----------
tmp_path : pathlib.Path
The path to the test directory.

"""
# Create storage
storage["uri"] = tmp_path / "test_reset_run.sqlite" # type: ignore
# Run operation to generate files
run(
workdir=tmp_path,
datagrabber=datagrabber,
markers=markers,
storage=storage,
elements=["sub-01"],
)
# Reset operation
reset(config={"storage": storage})

assert not Path(storage["uri"]).exists()


@pytest.mark.parametrize(
"job_name",
(
"job",
None,
),
)
def test_reset_queue(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, job_name: str
) -> None:
"""Test reset function for queue.

Parameters
----------
tmp_path : pathlib.Path
The path to the test directory.
monkeypatch : pytest.MonkeyPatch
The pytest.MonkeyPatch object.
job_name : str
The parametrized job name.

"""
with monkeypatch.context() as m:
m.chdir(tmp_path)
# Create storage
storage["uri"] = "test_reset_queue.sqlite"
# Set job name
if job_name is None:
job_name = "junifer_job"
# Queue operation to generate files
queue(
config={
"with": "junifer.testing.registry",
"workdir": str(tmp_path.resolve()),
"datagrabber": datagrabber,
"markers": markers,
"storage": storage,
"env": {
"kind": "conda",
"name": "junifer",
},
"mem": "8G",
},
kind="HTCondor",
jobname=job_name,
)
# Reset operation
reset(
config={
"storage": storage,
"queue": {"jobname": job_name},
}
)

assert not Path(storage["uri"]).exists()
assert not (tmp_path / "junifer_jobs" / job_name).exists()
1 change: 0 additions & 1 deletion junifer/datagrabber/tests/test_dmcc13_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def test_DMCC13Benchmark(
dg.uri = URI

with dg:
# breakpoint()
# Get all elements
all_elements = dg.get_elements()
# Get test element
Expand Down
17 changes: 17 additions & 0 deletions junifer/pipeline/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def register(step: str, name: str, klass: type) -> None:
klass : class
Class to be registered.

Raises
------
ValueError
If the ``step`` is invalid.

"""
# Verify step
if step not in _VALID_STEPS:
Expand All @@ -63,6 +68,11 @@ def get_step_names(step: str) -> List[str]:
list
List of registered function names.

Raises
------
ValueError
If the ``step`` is invalid.

"""
# Verify step
if step not in _VALID_STEPS:
Expand All @@ -86,6 +96,11 @@ def get_class(step: str, name: str) -> type:
class
Registered function class.

Raises
------
ValueError
If the ``step`` or ``name`` is invalid.

"""
# Verify step
if step not in _VALID_STEPS:
Expand Down Expand Up @@ -123,6 +138,8 @@ def build(

Raises
------
RuntimeError
If there is a problem creating the instance.
ValueError
If the created object with the given name is not an instance of the
base class.
Expand Down
Loading