diff --git a/ignite/metrics/frequency.py b/ignite/metrics/frequency.py index 8c63edd1ec9..52f02565ac4 100644 --- a/ignite/metrics/frequency.py +++ b/ignite/metrics/frequency.py @@ -49,7 +49,7 @@ def reset(self) -> None: self._acc = 0 self._n = 0 self._elapsed = 0.0 - super(Frequency, self).reset() + super(Frequency, self).reset() # type: ignore @reinit__is_reduced def update(self, output: int) -> None: diff --git a/ignite/metrics/gan/fid.py b/ignite/metrics/gan/fid.py index 188bad5035a..b74efe3e0e9 100644 --- a/ignite/metrics/gan/fid.py +++ b/ignite/metrics/gan/fid.py @@ -226,7 +226,7 @@ def reset(self) -> None: self._test_total = torch.zeros(self._num_features, dtype=torch.float64, device=self._device) self._num_examples: int = 0 - super(FID, self).reset() + super(FID, self).reset() # type: ignore @reinit__is_reduced def update(self, output: Sequence[torch.Tensor]) -> None: diff --git a/ignite/metrics/gan/inception_score.py b/ignite/metrics/gan/inception_score.py index 60b1d4785f7..b2a179fa65d 100644 --- a/ignite/metrics/gan/inception_score.py +++ b/ignite/metrics/gan/inception_score.py @@ -106,7 +106,7 @@ def reset(self) -> None: self._prob_total = torch.zeros(self._num_features, dtype=torch.float64, device=self._device) self._total_kl_d = torch.zeros(self._num_features, dtype=torch.float64, device=self._device) - super(InceptionScore, self).reset() + super(InceptionScore, self).reset() # type: ignore @reinit__is_reduced def update(self, output: torch.Tensor) -> None: diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py index 9855fd8eb9f..4e6712c43cf 100644 --- a/tests/ignite/conftest.py +++ b/tests/ignite/conftest.py @@ -13,6 +13,12 @@ import ignite.distributed as idist +def pytest_configure(config): + config.addinivalue_line("markers", "distributed: run distributed") + config.addinivalue_line("markers", "multinode_distributed: distributed") + config.addinivalue_line("markers", "tpu: run on tpu") + + @pytest.fixture( params=[ "cpu", @@ -492,3 +498,14 @@ def xla_worker(index, fn): assert ex_.code == 0, "Didn't successfully exit in XLA test" pyfuncitem.obj = functools.partial(testfunc_wrapper, pyfuncitem.obj) + + +def pytest_collection_modifyitems(items): + for item in items: + if "distributed" in item.fixturenames: + # Run distributed tests on a single worker to avoid RACE conditions + # This requires that the --dist=loadgroup option be passed to pytest. + item.add_marker(pytest.mark.xdist_group("distributed")) + item.add_marker(pytest.mark.timeout(45)) + if "multinode_distributed" in item.fixturenames: + item.add_marker(pytest.mark.timeout(45)) diff --git a/tests/ignite/distributed/comp_models/test_native.py b/tests/ignite/distributed/comp_models/test_native.py index c771da4148c..09e4d305460 100644 --- a/tests/ignite/distributed/comp_models/test_native.py +++ b/tests/ignite/distributed/comp_models/test_native.py @@ -11,8 +11,6 @@ else: from ignite.distributed.comp_models.native import _expand_hostlist, _NativeDistModel, _setup_ddp_vars_from_slurm_env -pytestmark = pytest.mark.timeout(60) - # tests from https://github.com/LLNL/py-hostlist/blob/master/hostlist/unittest_hostlist.py @pytest.mark.parametrize( diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py index 10083ed1bc2..b12e2acf1c2 100644 --- a/tests/ignite/distributed/test_launcher.py +++ b/tests/ignite/distributed/test_launcher.py @@ -10,8 +10,6 @@ import ignite.distributed as idist from ignite.distributed.utils import has_hvd_support, has_native_dist_support, has_xla_support -pytestmark = pytest.mark.timeout(60) - def test_parallel_wrong_inputs(): with pytest.raises(ValueError, match=r"Unknown backend 'abc'. Available backends:"): diff --git a/tests/ignite/handlers/conftest.py b/tests/ignite/handlers/conftest.py index 9d7bb999463..79ac0809698 100644 --- a/tests/ignite/handlers/conftest.py +++ b/tests/ignite/handlers/conftest.py @@ -1,58 +1,41 @@ -import random +import subprocess +import time from pathlib import Path from unittest.mock import Mock import pytest import torch +from visdom import Visdom +from visdom.server.build import download_scripts -vd_hostname = None -vd_port = None -vd_server_process = None - -@pytest.fixture() +@pytest.fixture(scope="session") def visdom_server(): # Start Visdom server once and stop it with visdom_server_stop - global vd_hostname, vd_port, vd_server_process - - if vd_server_process is None: - import subprocess - import time - - from visdom import Visdom - from visdom.server.build import download_scripts - + vd_hostname = "localhost" + if not (Path.home() / ".visdom").exists(): (Path.home() / ".visdom").mkdir(exist_ok=True) download_scripts() + vis = None - vd_hostname = "localhost" - vd_port = random.randint(8089, 8887) - + vd_port = 29777 + vd_server_process = subprocess.Popen( + ["python", "-m", "visdom.server", "--hostname", vd_hostname, "-port", str(vd_port)] + ) + time.sleep(2) + for ii in range(5): try: + time.sleep(1) vis = Visdom(server=vd_hostname, port=vd_port, raise_exceptions=True) + break except ConnectionError: - pass - - vd_server_process = subprocess.Popen( - ["python", "-m", "visdom.server", "--hostname", vd_hostname, "-port", str(vd_port)] - ) - time.sleep(5) - - vis = Visdom(server=vd_hostname, port=vd_port) - assert vis.check_connection() - vis.close() + continue + assert vis and vis.check_connection() yield (vd_hostname, vd_port) - - -@pytest.fixture() -def visdom_server_stop(): - yield None - - import time - - vd_server_process.kill() - time.sleep(2) + # Trying to clean up slows things down and sometimes causes hangs. + # vis.close() + # vd_server_process.kill() @pytest.fixture diff --git a/tests/ignite/handlers/test_lr_finder.py b/tests/ignite/handlers/test_lr_finder.py index e12d951dfbf..23b823d9ce4 100644 --- a/tests/ignite/handlers/test_lr_finder.py +++ b/tests/ignite/handlers/test_lr_finder.py @@ -3,6 +3,8 @@ from pathlib import Path from unittest.mock import MagicMock +import filelock + import matplotlib import pytest import torch @@ -144,16 +146,27 @@ def dataloader_plot(): @pytest.fixture -def mnist_dataloader(): +def mnist_dataloader(tmp_path_factory): from torch.utils.data import DataLoader from torchvision.datasets import MNIST from torchvision.transforms import Compose, Normalize, ToTensor data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) - train_loader = DataLoader( - MNIST(download=True, root="/tmp", transform=data_transform, train=True), batch_size=256, shuffle=True - ) + root_tmp_dir = tmp_path_factory.getbasetemp().parent + while True: + try: + with filelock.FileLock(root_tmp_dir / "mnist_download.lock", timeout=0.2) as fn: + fn.acquire() + train_loader = DataLoader( + MNIST(download=True, root="/tmp", transform=data_transform, train=True), + batch_size=256, + shuffle=True, + ) + fn.release() + break + except filelock._error.Timeout: + pass yield train_loader diff --git a/tests/ignite/handlers/test_tqdm_logger.py b/tests/ignite/handlers/test_tqdm_logger.py index 0f9a501ebf8..cae59ac15b4 100644 --- a/tests/ignite/handlers/test_tqdm_logger.py +++ b/tests/ignite/handlers/test_tqdm_logger.py @@ -33,9 +33,9 @@ def update_fn(engine, batch): def test_pbar_errors(): with pytest.raises(ModuleNotFoundError, match=r"This contrib module requires tqdm to be installed"): with patch.dict("sys.modules", {"tqdm.autonotebook": None}): - ProgressBar() + ProgressBar(ncols=80) - pbar = ProgressBar() + pbar = ProgressBar(ncols=80) with pytest.raises(ValueError, match=r"Logging event abc is not in allowed"): pbar.attach(Engine(lambda e, b: None), event_name=Namespace(name="abc")) @@ -45,7 +45,7 @@ def test_pbar(capsys): loader = [1, 2] engine = Engine(update_fn) - pbar = ProgressBar() + pbar = ProgressBar(ncols=80) pbar.attach(engine, ["a"]) engine.run(loader, max_epochs=n_epochs) @@ -55,9 +55,9 @@ def test_pbar(capsys): err = list(map(lambda x: x.strip(), err)) err = list(filter(None, err)) if get_tqdm_version() < Version("4.49.0"): - expected = "Epoch [2/2]: [1/2] 50%|█████ , a=1 [00:00<00:00]" + expected = "Epoch 8 -*- , a=1 [00:00<00:00]" else: - expected = "Epoch [2/2]: [1/2] 50%|█████ , a=1 [00:00