From 42d56932fa8d53a1ee7b02d4a3b3e4fce0f07e4f Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 31 May 2023 12:45:33 +0200 Subject: [PATCH 1/4] Update dependencies --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9202900..4a6f84a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,14 +26,12 @@ shadems = {reference = "bin/shadems", type = "file"} [tool.poetry.dependencies] python = "^3.8" datashader = "^0.13.0" -dask-ms = { version = "^0.2.15", extras = ["xarray"] } +dask-ms = { version = "^0.2.16", extras = ["xarray"] } holoviews = "^1.14.9" matplotlib = { version = "^3.6.0" } cmasher = "^1.6.3" future-fstrings = "^1.2.0" requests = "^2.27.1" -numpy = "1.23.5" # Datashader installs the latest numpy which is in conflict with <=1.35.5 required by numba -dask = { extras = ["array"], version = "2022.9.1" } # Datashader installs the latest dask which is in conflict with <2023.0.0 required by dask-ms[array] pytest = { version = "^7.2.2", optional=true } pytest-flake8 = { version = "^1.1.1", optional=true } From a749d24b42e920a4f2cf96419ba8ccc8c58f608b Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 31 May 2023 12:51:54 +0200 Subject: [PATCH 2/4] Replace deprecated Dataframe.append with pandas.concat - https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#removal-of-prior-version-deprecations-changes - https://stackoverflow.com/a/75956237 --- shade_ms/tests/test_dask_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shade_ms/tests/test_dask_utils.py b/shade_ms/tests/test_dask_utils.py index 1ba44fa..946c763 100644 --- a/shade_ms/tests/test_dask_utils.py +++ b/shade_ms/tests/test_dask_utils.py @@ -3,6 +3,7 @@ import dask.dataframe as dd import numpy as np from numpy.testing import assert_array_equal +import pandas as pd import pytest from shade_ms.dask_utils import dataframe_factory @@ -92,7 +93,7 @@ def test_dataframe_factory_multicol(): assert_array_equal(df['c0'].min(), data1c.min()) assert_array_equal(df['c0'].max(), data1c.max()) - df = df.append(df) + df = pd.concat([df, df]) assert_array_equal(df['x'].min(), data1a.min()) assert_array_equal(df['x'].max(), data1a.max()) From 8ec14c489ec589d7ec312db091764f578d649c5e Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 31 May 2023 12:54:26 +0200 Subject: [PATCH 3/4] Remove test-dataframe-factory cruft --- test-dataframe-factory.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 test-dataframe-factory.py diff --git a/test-dataframe-factory.py b/test-dataframe-factory.py deleted file mode 100644 index ed13f93..0000000 --- a/test-dataframe-factory.py +++ /dev/null @@ -1,23 +0,0 @@ -import dask.array as da -from shade_ms.dask_utils import dataframe_factory, multicol_dataframe_factory - - -nrow, nfreq, ncorr = 100, 100, 4 - -data1a = da.arange(nrow, chunks=(10,)) - -data1b = da.zeros(dtype=float, shape=(nfreq,), chunks=(100,)) - -data1c = da.zeros(dtype=float, shape=(nfreq,ncorr), chunks=(100,4)) - -data1d = da.zeros(dtype=float, shape=()) - -df = dataframe_factory(("row", "chan"), - data1a, ("row",), - data1b, ("chan",)) - -df1 = multicol_dataframe_factory(("row", "chan", "corr"), - dict(a=data1a, b=data1b, x=data1c, y=data1d), - dict(a=("row",), b=("chan",), x=("chan", "corr"), y=())) - -print(df1['y']) \ No newline at end of file From 76935a690a95aabdb4b743161ea8a81a1c4660a1 Mon Sep 17 00:00:00 2001 From: Simon Perkins Date: Wed, 31 May 2023 13:00:35 +0200 Subject: [PATCH 4/4] Use dask.dataframe to concatentate dask dataframes --- shade_ms/tests/test_dask_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shade_ms/tests/test_dask_utils.py b/shade_ms/tests/test_dask_utils.py index 946c763..31b3c84 100644 --- a/shade_ms/tests/test_dask_utils.py +++ b/shade_ms/tests/test_dask_utils.py @@ -3,7 +3,6 @@ import dask.dataframe as dd import numpy as np from numpy.testing import assert_array_equal -import pandas as pd import pytest from shade_ms.dask_utils import dataframe_factory @@ -93,7 +92,7 @@ def test_dataframe_factory_multicol(): assert_array_equal(df['c0'].min(), data1c.min()) assert_array_equal(df['c0'].max(), data1c.max()) - df = pd.concat([df, df]) + df = dd.multi.concat([df, df]) assert_array_equal(df['x'].min(), data1a.min()) assert_array_equal(df['x'].max(), data1a.max())