From 9d7148ec6c34f27b83ab6e446cdffd8c74ab7c9d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 15:03:50 -0700 Subject: [PATCH 01/20] Add api.md --- docs/source/api.md | 24 ++++++++++++++++++++++++ docs/source/conf.py | 2 +- docs/source/index.md | 1 + intake_thredds/cat.py | 16 ++++++++++++++-- 4 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 docs/source/api.md diff --git a/docs/source/api.md b/docs/source/api.md new file mode 100644 index 0000000..9b0488d --- /dev/null +++ b/docs/source/api.md @@ -0,0 +1,24 @@ +# API reference + +This is a reference API class listing, and modules. + +## Top-level functions + +```{eval-rst} +.. currentmodule:: intake +.. autosummary:: + open_thredds_cat + open_thredds_merged + +``` + +## Catalog & Source Objects + +```{eval-rst} +.. currentmodule:: intake_thredds.cat +.. automodule:: intake_thredds.cat + :members: + +.. automodule:: intake_thredds.source + :members: +``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 5bb21a4..86a9775 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -44,7 +44,7 @@ 'IPython.sphinxext.ipython_console_highlighting', 'IPython.sphinxext.ipython_directive', 'sphinx.ext.napoleon', - 'myst_parser', + 'myst_nb', ] autodoc_member_order = 'groupwise' diff --git a/docs/source/index.md b/docs/source/index.md index 11a64af..2adda02 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,6 +13,7 @@ If you encounter any errors or problems with **intake-thredds**, please open an maxdepth: 2 --- installation.md +api.md contributing.md changelog.md ``` diff --git a/intake_thredds/cat.py b/intake_thredds/cat.py index 9f34a15..fa579f5 100644 --- a/intake_thredds/cat.py +++ b/intake_thredds/cat.py @@ -3,12 +3,24 @@ class ThreddsCatalog(Catalog): + name = 'thredds_cat' - def __init__(self, url, driver='opendap', **kwargs): + def __init__(self, url: str, driver: str = 'opendap', **kwargs): + """ + Parameters + ---------- + url : str + Location of thredds catalog. + driver : str + Select driver to access data. Choose from 'netcdf' and 'opendap'. + **kwargs : + Additional keyword arguments are passed through to the + :py:class:`~intake.catalog.Catalog` base class. + """ + super(ThreddsCatalog, self).__init__(**kwargs) self.url = url self.driver = driver - super().__init__(**kwargs) def _load(self): from siphon.catalog import TDSCatalog From ad7cd78dd6b204ef1389e7b1d32119ccbe6f2287 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 15:14:41 -0700 Subject: [PATCH 02/20] Revert to super().__init__ --- intake_thredds/cat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intake_thredds/cat.py b/intake_thredds/cat.py index fa579f5..f4a3335 100644 --- a/intake_thredds/cat.py +++ b/intake_thredds/cat.py @@ -7,7 +7,7 @@ class ThreddsCatalog(Catalog): name = 'thredds_cat' def __init__(self, url: str, driver: str = 'opendap', **kwargs): - """ + """Intake catalog interface to a thredds catalog. Parameters ---------- url : str @@ -18,9 +18,9 @@ def __init__(self, url: str, driver: str = 'opendap', **kwargs): Additional keyword arguments are passed through to the :py:class:`~intake.catalog.Catalog` base class. """ - super(ThreddsCatalog, self).__init__(**kwargs) self.url = url self.driver = driver + super().__init__(**kwargs) def _load(self): from siphon.catalog import TDSCatalog From 3f507034b4007d749334bb9989b80c6c57c495d0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 15:47:33 -0700 Subject: [PATCH 03/20] Update API --- docs/source/api.md | 4 +++- docs/source/conf.py | 3 +++ intake_thredds/cat.py | 1 + intake_thredds/source.py | 10 ++++++---- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docs/source/api.md b/docs/source/api.md index 9b0488d..4a68955 100644 --- a/docs/source/api.md +++ b/docs/source/api.md @@ -15,10 +15,12 @@ This is a reference API class listing, and modules. ## Catalog & Source Objects ```{eval-rst} -.. currentmodule:: intake_thredds.cat +.. currentmodule:: intake_thredds .. automodule:: intake_thredds.cat :members: + .. automodule:: intake_thredds.source :members: + ``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 86a9775..fed8510 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -68,6 +68,9 @@ # Otherwise, the Return parameter list looks different from the Parameters list napoleon_use_rtype = False +autodoc_typehints = 'none' +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = True # Enable notebook execution diff --git a/intake_thredds/cat.py b/intake_thredds/cat.py index f4a3335..18eaa18 100644 --- a/intake_thredds/cat.py +++ b/intake_thredds/cat.py @@ -8,6 +8,7 @@ class ThreddsCatalog(Catalog): def __init__(self, url: str, driver: str = 'opendap', **kwargs): """Intake catalog interface to a thredds catalog. + Parameters ---------- url : str diff --git a/intake_thredds/source.py b/intake_thredds/source.py index f89aa49..bb3f5fe 100644 --- a/intake_thredds/source.py +++ b/intake_thredds/source.py @@ -17,22 +17,24 @@ class THREDDSMergedSource(DataSourceMixin): partition_access = True def __init__(self, url, path, driver='opendap', progressbar=True, metadata=None): - """ + """Merges multiple datasets into a single datasets. + + This source takes a THREDDS URL and a path to descend down, and calls the + combine function on all of the datasets found. Parameters ---------- url : str Location of server path : list of str - Subcats to follow; include glob characters (*, ?) in here for - matching + Subcats to follow; include glob characters (*, ?) in here for matching. driver : str Select driver to access data. Choose from 'netcdf' and 'opendap'. progressbar : bool If True, will print a progress bar. Requires `tqdm `__ to be installed. metadata : dict or None - To associate with this source + To associate with this source. """ super(THREDDSMergedSource, self).__init__(metadata=metadata) self.urlpath = url From b0cfae659afcece4b38846e9df562873973151ed Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 16:05:28 -0700 Subject: [PATCH 04/20] Update docstrings --- docs/source/api.md | 2 +- intake_thredds/cat.py | 30 ++++++++++++-------- intake_thredds/source.py | 61 +++++++++++++++++++++++++++------------- 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/docs/source/api.md b/docs/source/api.md index 4a68955..d8a6bca 100644 --- a/docs/source/api.md +++ b/docs/source/api.md @@ -1,4 +1,4 @@ -# API reference +# API Reference This is a reference API class listing, and modules. diff --git a/intake_thredds/cat.py b/intake_thredds/cat.py index 18eaa18..479c45f 100644 --- a/intake_thredds/cat.py +++ b/intake_thredds/cat.py @@ -3,22 +3,28 @@ class ThreddsCatalog(Catalog): + """Intake catalog interface to a thredds catalog. + + Parameters + ---------- + url : str + Location of thredds catalog. + driver : str + Select driver to access data. Choose from 'netcdf' and 'opendap'. + **kwargs : + Additional keyword arguments are passed through to the + :py:class:`~intake.catalog.Catalog` base class. + + Examples + -------- + >>> import intake + >>> cat_url = 'https://psl.noaa.gov/thredds/catalog/Datasets/noaa.ersst/catalog.xml' + >>> cat = intake.open_thredds_cat(cat_url) + """ name = 'thredds_cat' def __init__(self, url: str, driver: str = 'opendap', **kwargs): - """Intake catalog interface to a thredds catalog. - - Parameters - ---------- - url : str - Location of thredds catalog. - driver : str - Select driver to access data. Choose from 'netcdf' and 'opendap'. - **kwargs : - Additional keyword arguments are passed through to the - :py:class:`~intake.catalog.Catalog` base class. - """ self.url = url self.driver = driver super().__init__(**kwargs) diff --git a/intake_thredds/source.py b/intake_thredds/source.py index bb3f5fe..fdca052 100644 --- a/intake_thredds/source.py +++ b/intake_thredds/source.py @@ -11,31 +11,54 @@ class THREDDSMergedSource(DataSourceMixin): + """Merges multiple datasets into a single datasets. + + This source takes a THREDDS URL and a path to descend down, and calls the + combine function on all of the datasets found. + + Parameters + ---------- + url : str + Location of server + path : list of str + Subcats to follow; include glob characters (*, ?) in here for matching. + driver : str + Select driver to access data. Choose from 'netcdf' and 'opendap'. + progressbar : bool + If True, will print a progress bar. Requires `tqdm `__ + to be installed. + metadata : dict or None + To associate with this source. + + Examples + -------- + >>> import intake + >>> cat_url = 'https://psl.noaa.gov/thredds/catalog.xml' + >>> paths = ['Datasets', 'ncep.reanalysis.dailyavgs', 'surface', 'air*sig995*194*.nc'] + >>> src = intake.open_thredds_merged(cat_url, paths) + >>> src + sources: + thredds_merged: + args: + path: + - Datasets + - ncep.reanalysis.dailyavgs + - surface + - air*sig995*194*.nc + url: https://psl.noaa.gov/thredds/catalog.xml + description: '' + driver: intake_thredds.source.THREDDSMergedSource + metadata: {} + + """ + version = '1.0' container = 'xarray' name = 'thredds_merged' partition_access = True def __init__(self, url, path, driver='opendap', progressbar=True, metadata=None): - """Merges multiple datasets into a single datasets. - - This source takes a THREDDS URL and a path to descend down, and calls the - combine function on all of the datasets found. - - Parameters - ---------- - url : str - Location of server - path : list of str - Subcats to follow; include glob characters (*, ?) in here for matching. - driver : str - Select driver to access data. Choose from 'netcdf' and 'opendap'. - progressbar : bool - If True, will print a progress bar. Requires `tqdm `__ - to be installed. - metadata : dict or None - To associate with this source. - """ + super(THREDDSMergedSource, self).__init__(metadata=metadata) self.urlpath = url if 'simplecache::' in url: From 3bbdb2130dedd71fdda1fdc386bc429b67ae7a32 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 16:51:46 -0700 Subject: [PATCH 05/20] Add tutorial notebook --- docs/source/conf.py | 8 +- docs/source/index.md | 1 + docs/source/tutorial.ipynb | 1388 ++++++++++++++++++++++++++++++++++++ intake_thredds/cat.py | 2 +- 4 files changed, 1391 insertions(+), 8 deletions(-) create mode 100644 docs/source/tutorial.ipynb diff --git a/docs/source/conf.py b/docs/source/conf.py index fed8510..1b1abba 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -74,13 +74,7 @@ # Enable notebook execution -# https://nbsphinx.readthedocs.io/en/0.4.2/never-execute.html -# nbsphinx_execute = 'auto' -# Allow errors in all notebooks by -nbsphinx_allow_errors = True - -# Disable cell timeout -nbsphinx_timeout = -1 +jupyter_execute_notebooks = 'auto' # The suffix of source filenames. diff --git a/docs/source/index.md b/docs/source/index.md index 2adda02..38e91d6 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,6 +13,7 @@ If you encounter any errors or problems with **intake-thredds**, please open an maxdepth: 2 --- installation.md +tutorial.ipynb api.md contributing.md changelog.md diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorial.ipynb new file mode 100644 index 0000000..5fc4d24 --- /dev/null +++ b/docs/source/tutorial.ipynb @@ -0,0 +1,1388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "regulation-questionnaire", + "metadata": {}, + "source": [ + "# Tutorial\n", + "\n", + "Intake-thredds provides an interface that combines [`siphon`](https://github.com/Unidata/siphon) and `intake` to retrieve data from THREDDS data servers. This tutorial provides an introduction to the API and features of intake-thredds. Let's begin by importing `intake`. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "attractive-liberal", + "metadata": {}, + "outputs": [], + "source": [ + "import intake" + ] + }, + { + "cell_type": "markdown", + "id": "polished-clearance", + "metadata": {}, + "source": [ + "## Loading a catalog\n", + "\n", + "You can load data from a THREDDS catalog by providing the URL to a valid THREDDS catalog: " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "conceptual-sound", + "metadata": {}, + "outputs": [], + "source": [ + "cat_url = 'https://psl.noaa.gov/thredds/catalog/Datasets/noaa.ersst/catalog.xml'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "confident-heading", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], + "source": [ + "catalog = intake.open_thredds_cat(cat_url)\n", + "print(catalog)\n", + "print(type(catalog))" + ] + }, + { + "cell_type": "markdown", + "id": "sustained-threshold", + "metadata": {}, + "source": [ + "## Using the catalog\n", + "\n", + "Once you've loaded a catalog, you can display its contents by iterating over its entries:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "accomplished-classification", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['err.mnmean.v3.nc',\n", + " 'sst.mnmean.v3.nc',\n", + " 'sst.mnmean.v4.nc',\n", + " 'sst.mon.1971-2000.ltm.v4.nc',\n", + " 'sst.mon.19712000.ltm.v3.nc',\n", + " 'sst.mon.1981-2010.ltm.v3.nc',\n", + " 'sst.mon.1981-2010.ltm.v4.nc']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(catalog)" + ] + }, + { + "cell_type": "markdown", + "id": "handy-baking", + "metadata": {}, + "source": [ + "Once you've identified a dataset of interest, you can access it as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "committed-cement", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sources:\n", + " err.mnmean.v3.nc:\n", + " args:\n", + " chunks: {}\n", + " urlpath: https://psl.noaa.gov/thredds/dodsC/Datasets/noaa.ersst/err.mnmean.v3.nc\n", + " description: THREDDS data\n", + " driver: intake_xarray.opendap.OpenDapSource\n", + " metadata:\n", + " catalog_dir: null\n", + "\n" + ] + } + ], + "source": [ + "source = catalog['err.mnmean.v3.nc']\n", + "print(source)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "educational-competition", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(source))" + ] + }, + { + "cell_type": "markdown", + "id": "favorite-plasma", + "metadata": {}, + "source": [ + "## Loading a dataset\n", + "\n", + "To load a dataset of interest, you can use the `to_dask()` method which is available on a **source** object:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "willing-realtor", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 719 ms, sys: 185 ms, total: 904 ms\n", + "Wall time: 9.44 s\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:    (lat: 89, lon: 180, nbnds: 2, time: 1994)\n",
+       "Coordinates:\n",
+       "  * lat        (lat) float32 88.0 86.0 84.0 82.0 ... -82.0 -84.0 -86.0 -88.0\n",
+       "  * lon        (lon) float32 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n",
+       "  * time       (time) datetime64[ns] 1854-01-01 1854-02-01 ... 2020-02-01\n",
+       "Dimensions without coordinates: nbnds\n",
+       "Data variables:\n",
+       "    time_bnds  (time, nbnds) float64 dask.array<chunksize=(1994, 2), meta=np.ndarray>\n",
+       "    err        (time, lat, lon) float32 dask.array<chunksize=(1994, 89, 180), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    title:                           NOAA Extended Reconstructed SST V3\n",
+       "    Conventions:                     CF-1.0\n",
+       "    history:                         Thu Jul  1 14:04:15 2010: ncatted -O -a ...\n",
+       "    comments:                        The extended reconstructed sea surface t...\n",
+       "    platform:                        Model\n",
+       "    source:                          NOAA/NESDIS/National Climatic Data Center\n",
+       "    institution:                     NOAA/NESDIS/National Climatic Data Center\n",
+       "    citation:                        Smith, T.M., R.W. Reynolds, Thomas C. Pe...\n",
+       "    dataset_title:                   Extended Reconstructed Sea Surface Tempe...\n",
+       "    source_doc:                      https://www.ncdc.noaa.gov/data-access/ma...\n",
+       "    References:                      https://www.psl.noaa.gov/data/gridded/da...\n",
+       "    DODS_EXTRA.Unlimited_Dimension:  time
" + ], + "text/plain": [ + "\n", + "Dimensions: (lat: 89, lon: 180, nbnds: 2, time: 1994)\n", + "Coordinates:\n", + " * lat (lat) float32 88.0 86.0 84.0 82.0 ... -82.0 -84.0 -86.0 -88.0\n", + " * lon (lon) float32 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n", + " * time (time) datetime64[ns] 1854-01-01 1854-02-01 ... 2020-02-01\n", + "Dimensions without coordinates: nbnds\n", + "Data variables:\n", + " time_bnds (time, nbnds) float64 dask.array\n", + " err (time, lat, lon) float32 dask.array\n", + "Attributes:\n", + " title: NOAA Extended Reconstructed SST V3\n", + " Conventions: CF-1.0\n", + " history: Thu Jul 1 14:04:15 2010: ncatted -O -a ...\n", + " comments: The extended reconstructed sea surface t...\n", + " platform: Model\n", + " source: NOAA/NESDIS/National Climatic Data Center\n", + " institution: NOAA/NESDIS/National Climatic Data Center\n", + " citation: Smith, T.M., R.W. Reynolds, Thomas C. Pe...\n", + " dataset_title: Extended Reconstructed Sea Surface Tempe...\n", + " source_doc: https://www.ncdc.noaa.gov/data-access/ma...\n", + " References: https://www.psl.noaa.gov/data/gridded/da...\n", + " DODS_EXTRA.Unlimited_Dimension: time" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "ds = source().to_dask()\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "miniature-indianapolis", + "metadata": {}, + "source": [ + "The `to_dask()` reads only metadata needed to construct an ``xarray.Dataset``. The actual data are streamed over the network when computation routines are invoked on the dataset. \n", + "By default, intake-thredds uses ``chunks={}`` to load the dataset with dask using a single chunk for all arrays. You can use a different chunking scheme by prividing a custom value of chunks before calling `.to_dask()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "flush-philip", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 218 ms, sys: 18.3 ms, total: 237 ms\n", + "Wall time: 8.3 s\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:    (lat: 89, lon: 180, nbnds: 2, time: 1994)\n",
+       "Coordinates:\n",
+       "  * lat        (lat) float32 88.0 86.0 84.0 82.0 ... -82.0 -84.0 -86.0 -88.0\n",
+       "  * lon        (lon) float32 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n",
+       "  * time       (time) datetime64[ns] 1854-01-01 1854-02-01 ... 2020-02-01\n",
+       "Dimensions without coordinates: nbnds\n",
+       "Data variables:\n",
+       "    time_bnds  (time, nbnds) float64 dask.array<chunksize=(100, 2), meta=np.ndarray>\n",
+       "    err        (time, lat, lon) float32 dask.array<chunksize=(100, 89, 90), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    title:                           NOAA Extended Reconstructed SST V3\n",
+       "    Conventions:                     CF-1.0\n",
+       "    history:                         Thu Jul  1 14:04:15 2010: ncatted -O -a ...\n",
+       "    comments:                        The extended reconstructed sea surface t...\n",
+       "    platform:                        Model\n",
+       "    source:                          NOAA/NESDIS/National Climatic Data Center\n",
+       "    institution:                     NOAA/NESDIS/National Climatic Data Center\n",
+       "    citation:                        Smith, T.M., R.W. Reynolds, Thomas C. Pe...\n",
+       "    dataset_title:                   Extended Reconstructed Sea Surface Tempe...\n",
+       "    source_doc:                      https://www.ncdc.noaa.gov/data-access/ma...\n",
+       "    References:                      https://www.psl.noaa.gov/data/gridded/da...\n",
+       "    DODS_EXTRA.Unlimited_Dimension:  time
" + ], + "text/plain": [ + "\n", + "Dimensions: (lat: 89, lon: 180, nbnds: 2, time: 1994)\n", + "Coordinates:\n", + " * lat (lat) float32 88.0 86.0 84.0 82.0 ... -82.0 -84.0 -86.0 -88.0\n", + " * lon (lon) float32 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0\n", + " * time (time) datetime64[ns] 1854-01-01 1854-02-01 ... 2020-02-01\n", + "Dimensions without coordinates: nbnds\n", + "Data variables:\n", + " time_bnds (time, nbnds) float64 dask.array\n", + " err (time, lat, lon) float32 dask.array\n", + "Attributes:\n", + " title: NOAA Extended Reconstructed SST V3\n", + " Conventions: CF-1.0\n", + " history: Thu Jul 1 14:04:15 2010: ncatted -O -a ...\n", + " comments: The extended reconstructed sea surface t...\n", + " platform: Model\n", + " source: NOAA/NESDIS/National Climatic Data Center\n", + " institution: NOAA/NESDIS/National Climatic Data Center\n", + " citation: Smith, T.M., R.W. Reynolds, Thomas C. Pe...\n", + " dataset_title: Extended Reconstructed Sea Surface Tempe...\n", + " source_doc: https://www.ncdc.noaa.gov/data-access/ma...\n", + " References: https://www.psl.noaa.gov/data/gridded/da...\n", + " DODS_EXTRA.Unlimited_Dimension: time" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# Use a custom chunking scheme\n", + "ds = source(chunks={'time': 100, 'lon': 90}).to_dask()\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "genetic-provider", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/intake_thredds/cat.py b/intake_thredds/cat.py index 479c45f..832fdb7 100644 --- a/intake_thredds/cat.py +++ b/intake_thredds/cat.py @@ -83,7 +83,7 @@ def access_urls(ds, self): 'THREDDS data', self.driver, True, - {'urlpath': access_urls(ds, self), 'chunks': None}, + {'urlpath': access_urls(ds, self), 'chunks': {}}, [], [], {}, From 5dab92e3f78eb61ba95b5b0767c9bc1bab5309db Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 18:03:10 -0700 Subject: [PATCH 06/20] Update notebook --- docs/source/tutorial.ipynb | 690 ++++++++++++++++++++++++++++++++++++- 1 file changed, 675 insertions(+), 15 deletions(-) diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorial.ipynb index 5fc4d24..e0a265c 100644 --- a/docs/source/tutorial.ipynb +++ b/docs/source/tutorial.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "regulation-questionnaire", + "id": "greater-hierarchy", "metadata": {}, "source": [ "# Tutorial\n", @@ -13,7 +13,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "attractive-liberal", + "id": "strange-concord", "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "markdown", - "id": "polished-clearance", + "id": "intelligent-freeze", "metadata": {}, "source": [ "## Loading a catalog\n", @@ -33,7 +33,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "conceptual-sound", + "id": "demographic-coupon", "metadata": {}, "outputs": [], "source": [ @@ -43,7 +43,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "confident-heading", + "id": "stunning-target", "metadata": {}, "outputs": [ { @@ -63,7 +63,7 @@ }, { "cell_type": "markdown", - "id": "sustained-threshold", + "id": "common-links", "metadata": {}, "source": [ "## Using the catalog\n", @@ -74,7 +74,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "accomplished-classification", + "id": "municipal-calibration", "metadata": {}, "outputs": [ { @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "handy-baking", + "id": "contrary-liquid", "metadata": {}, "source": [ "Once you've identified a dataset of interest, you can access it as follows:" @@ -109,7 +109,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "committed-cement", + "id": "white-festival", "metadata": {}, "outputs": [ { @@ -137,7 +137,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "educational-competition", + "id": "limiting-customs", "metadata": {}, "outputs": [ { @@ -154,7 +154,7 @@ }, { "cell_type": "markdown", - "id": "favorite-plasma", + "id": "light-truck", "metadata": {}, "source": [ "## Loading a dataset\n", @@ -165,7 +165,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "willing-realtor", + "id": "sweet-correspondence", "metadata": {}, "outputs": [ { @@ -724,7 +724,7 @@ }, { "cell_type": "markdown", - "id": "miniature-indianapolis", + "id": "finished-brisbane", "metadata": {}, "source": [ "The `to_dask()` reads only metadata needed to construct an ``xarray.Dataset``. The actual data are streamed over the network when computation routines are invoked on the dataset. \n", @@ -734,7 +734,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "flush-philip", + "id": "phantom-contractor", "metadata": {}, "outputs": [ { @@ -1348,10 +1348,670 @@ "ds" ] }, + { + "cell_type": "markdown", + "id": "peripheral-ribbon", + "metadata": {}, + "source": [ + "## Working with nested catalogs\n", + "\n", + "In some scenarious, a THREDDS catalog can reference another THREDDS catalog. This results into a nested structure consisting of a parent catalog and children catalogs:" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "affiliated-duncan", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Datasets', 'Aggregations']" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat_url = 'https://psl.noaa.gov/thredds/catalog.xml'\n", + "catalog = intake.open_thredds_cat(cat_url)\n", + "list(catalog)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "delayed-transaction", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['20thC_ReanV2', '20thC_ReanV2c', '20thC_ReanV3', 'ATOMIC', 'COBE', 'COBE2', 'CarbonTracker', 'E3SM', 'E3SM_LE', 'LIM', 'NARR', 'S2S', 'SERDP_regimeshifts', 'Timeseries', 'cmap', 'coads', 'cpc_global_precip', 'cpc_global_temp', 'cpc_us_hour_precip', 'cpc_us_precip', 'cpcsoil', 'cru', 'dai_pdsi', 'ghcncams', 'ghcngridded', 'gistemp', 'godas', 'gpcc', 'gpcp', 'icoads', 'icoads2.5', 'interp_OLR', 'jmatemp', 'kaplan_sst', 'livneh', 'mlost', 'mlostv3b', 'ncep', 'ncep.marine', 'ncep.pac.ocean', 'ncep.reanalysis', 'ncep.reanalysis.dailyavgs', 'ncep.reanalysis.derived', 'ncep.reanalysis2', 'ncep.reanalysis2.dailyavgs', 'ncep.reanalysis2.derived', 'noaa.ersst', 'noaa.ersst.v3', 'noaa.ersst.v4', 'noaa.ersst.v5', 'noaa.oisst.v2', 'noaa.oisst.v2.derived', 'noaa.oisst.v2.highres', 'noaa_hrc', 'noaaglobaltemp', 'noaamergedtemp', 'nodc.woa94', 'nodc.woa98', 'olrcdr', 'prec', 'precl', 'snowcover', 'udel.airt.precip', 'uninterp_OLR']\n" + ] + } + ], + "source": [ + "print(list(catalog['Datasets']))" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "emotional-burke", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['other_gauss', 'pressure', 'surface', 'surface_gauss', 'tropopause']\n" + ] + } + ], + "source": [ + "print(list(catalog['Datasets']['ncep.reanalysis.dailyavgs']))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "under-anchor", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['air.sig995.1948.nc', 'air.sig995.1949.nc', 'air.sig995.1950.nc', 'air.sig995.1951.nc', 'air.sig995.1952.nc', 'air.sig995.1953.nc', 'air.sig995.1954.nc', 'air.sig995.1955.nc', 'air.sig995.1956.nc', 'air.sig995.1957.nc']\n" + ] + } + ], + "source": [ + "print(list(catalog['Datasets']['ncep.reanalysis.dailyavgs']['surface'])[:10])" + ] + }, + { + "cell_type": "markdown", + "id": "closed-forum", + "metadata": {}, + "source": [ + "To load data from such a nested catalog, `intake-thredds` provides a special source object `:py:class:~intake_thredds.source.THREDDSMergedSource` accessible via the `.open_thredds_merged()` function. The inputs for this function consists of:\n", + "\n", + "- `url`: top level URL of the THREDDS catalog\n", + "- `path`: a list of paths for child catalogs to descend down. The paths can include glob characters (*, ?). These glob characters are used for matching." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "binary-transmission", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sources:\n", + " thredds_merged:\n", + " args:\n", + " path:\n", + " - Datasets\n", + " - ncep.reanalysis.dailyavgs\n", + " - surface\n", + " - air*sig995*194*.nc\n", + " url: https://psl.noaa.gov/thredds/catalog.xml\n", + " description: ''\n", + " driver: intake_thredds.source.THREDDSMergedSource\n", + " metadata: {}\n", + "\n" + ] + } + ], + "source": [ + "source = intake.open_thredds_merged(\n", + " cat_url, path=['Datasets', 'ncep.reanalysis.dailyavgs', 'surface', 'air*sig995*194*.nc']\n", + ")\n", + "print(source)" + ] + }, + { + "cell_type": "markdown", + "id": "minor-hepatitis", + "metadata": {}, + "source": [ + "To load the data into `:py:class:~xarray.Dataset`, you can invoke the `to_dask()` method. \n", + "Internally, `:py:func:~intake.open_thredds_merged` does the following:\n", + "- descend down the given paths and collect all available datasets\n", + "- load each dataset in a dataset\n", + "- combine all loaded datasets into a single dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "durable-antique", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Dataset(s): 100%|████████████████████████████████| 2/2 [00:19<00:00, 9.91s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 845 ms, sys: 51.1 ms, total: 896 ms\n", + "Wall time: 29.2 s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (lat: 73, lon: 144, time: 731)\n",
+       "Coordinates:\n",
+       "  * lon      (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5\n",
+       "  * time     (time) datetime64[ns] 1948-01-01 1948-01-02 ... 1949-12-31\n",
+       "  * lat      (lat) float32 90.0 87.5 85.0 82.5 80.0 ... -82.5 -85.0 -87.5 -90.0\n",
+       "Data variables:\n",
+       "    air      (time, lat, lon) float32 dask.array<chunksize=(366, 73, 144), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:                     COARDS\n",
+       "    title:                           mean daily NMC reanalysis (1948)\n",
+       "    description:                     Data is from NMC initialized reanalysis\\...\n",
+       "    platform:                        Model\n",
+       "    history:                         created 99/05/11 by Hoop (netCDF2.3)\\nCo...\n",
+       "    dataset_title:                   NCEP-NCAR Reanalysis 1\n",
+       "    References:                      http://www.psl.noaa.gov/data/gridded/dat...\n",
+       "    DODS_EXTRA.Unlimited_Dimension:  time
" + ], + "text/plain": [ + "\n", + "Dimensions: (lat: 73, lon: 144, time: 731)\n", + "Coordinates:\n", + " * lon (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5\n", + " * time (time) datetime64[ns] 1948-01-01 1948-01-02 ... 1949-12-31\n", + " * lat (lat) float32 90.0 87.5 85.0 82.5 80.0 ... -82.5 -85.0 -87.5 -90.0\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: mean daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\...\n", + " platform: Model\n", + " history: created 99/05/11 by Hoop (netCDF2.3)\\nCo...\n", + " dataset_title: NCEP-NCAR Reanalysis 1\n", + " References: http://www.psl.noaa.gov/data/gridded/dat...\n", + " DODS_EXTRA.Unlimited_Dimension: time" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "ds = source.to_dask()\n", + "ds" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "genetic-provider", + "id": "finished-participation", "metadata": {}, "outputs": [], "source": [] From 2c66bf41affc6e89cfe4e3eef85a295ff739de7c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 18:14:25 -0700 Subject: [PATCH 07/20] Update notebook --- docs/source/tutorial.ipynb | 62 +++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorial.ipynb index e0a265c..26255b1 100644 --- a/docs/source/tutorial.ipynb +++ b/docs/source/tutorial.ipynb @@ -2,18 +2,18 @@ "cells": [ { "cell_type": "markdown", - "id": "greater-hierarchy", + "id": "caroline-wheel", "metadata": {}, "source": [ "# Tutorial\n", "\n", - "Intake-thredds provides an interface that combines [`siphon`](https://github.com/Unidata/siphon) and `intake` to retrieve data from THREDDS data servers. This tutorial provides an introduction to the API and features of intake-thredds. Let's begin by importing `intake`. " + "Intake-thredds provides an interface that combines functionality from [`siphon`](https://github.com/Unidata/siphon) and `intake` to retrieve data from THREDDS data servers. This tutorial provides an introduction to the API and features of intake-thredds. Let's begin by importing `intake`. " ] }, { "cell_type": "code", "execution_count": 1, - "id": "strange-concord", + "id": "minimal-calculation", "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "markdown", - "id": "intelligent-freeze", + "id": "knowing-yugoslavia", "metadata": {}, "source": [ "## Loading a catalog\n", @@ -33,7 +33,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "demographic-coupon", + "id": "cheap-assessment", "metadata": {}, "outputs": [], "source": [ @@ -43,7 +43,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "stunning-target", + "id": "inside-boutique", "metadata": {}, "outputs": [ { @@ -63,7 +63,7 @@ }, { "cell_type": "markdown", - "id": "common-links", + "id": "sustainable-variation", "metadata": {}, "source": [ "## Using the catalog\n", @@ -74,7 +74,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "municipal-calibration", + "id": "dated-litigation", "metadata": {}, "outputs": [ { @@ -100,7 +100,7 @@ }, { "cell_type": "markdown", - "id": "contrary-liquid", + "id": "macro-capital", "metadata": {}, "source": [ "Once you've identified a dataset of interest, you can access it as follows:" @@ -109,7 +109,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "white-festival", + "id": "everyday-motion", "metadata": {}, "outputs": [ { @@ -137,7 +137,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "limiting-customs", + "id": "limiting-smart", "metadata": {}, "outputs": [ { @@ -154,7 +154,7 @@ }, { "cell_type": "markdown", - "id": "light-truck", + "id": "rapid-universal", "metadata": {}, "source": [ "## Loading a dataset\n", @@ -165,7 +165,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "sweet-correspondence", + "id": "leading-breakdown", "metadata": {}, "outputs": [ { @@ -724,7 +724,7 @@ }, { "cell_type": "markdown", - "id": "finished-brisbane", + "id": "quick-direction", "metadata": {}, "source": [ "The `to_dask()` reads only metadata needed to construct an ``xarray.Dataset``. The actual data are streamed over the network when computation routines are invoked on the dataset. \n", @@ -734,7 +734,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "phantom-contractor", + "id": "rural-manor", "metadata": {}, "outputs": [ { @@ -1350,7 +1350,7 @@ }, { "cell_type": "markdown", - "id": "peripheral-ribbon", + "id": "protective-trader", "metadata": {}, "source": [ "## Working with nested catalogs\n", @@ -1361,7 +1361,7 @@ { "cell_type": "code", "execution_count": 39, - "id": "affiliated-duncan", + "id": "south-kenya", "metadata": {}, "outputs": [ { @@ -1384,7 +1384,7 @@ { "cell_type": "code", "execution_count": 41, - "id": "delayed-transaction", + "id": "advance-registration", "metadata": {}, "outputs": [ { @@ -1402,7 +1402,7 @@ { "cell_type": "code", "execution_count": 42, - "id": "emotional-burke", + "id": "alpha-canada", "metadata": {}, "outputs": [ { @@ -1420,7 +1420,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "under-anchor", + "id": "offshore-intro", "metadata": {}, "outputs": [ { @@ -1437,10 +1437,10 @@ }, { "cell_type": "markdown", - "id": "closed-forum", + "id": "innovative-thursday", "metadata": {}, "source": [ - "To load data from such a nested catalog, `intake-thredds` provides a special source object `:py:class:~intake_thredds.source.THREDDSMergedSource` accessible via the `.open_thredds_merged()` function. The inputs for this function consists of:\n", + "To load data from such a nested catalog, `intake-thredds` provides a special source object {py:class}`~intake_thredds.source.THREDDSMergedSource` accessible via the `.open_thredds_merged()` function. The inputs for this function consists of:\n", "\n", "- `url`: top level URL of the THREDDS catalog\n", "- `path`: a list of paths for child catalogs to descend down. The paths can include glob characters (*, ?). These glob characters are used for matching." @@ -1449,7 +1449,7 @@ { "cell_type": "code", "execution_count": 46, - "id": "binary-transmission", + "id": "distinct-annual", "metadata": {}, "outputs": [ { @@ -1481,20 +1481,20 @@ }, { "cell_type": "markdown", - "id": "minor-hepatitis", + "id": "filled-surveillance", "metadata": {}, "source": [ - "To load the data into `:py:class:~xarray.Dataset`, you can invoke the `to_dask()` method. \n", - "Internally, `:py:func:~intake.open_thredds_merged` does the following:\n", - "- descend down the given paths and collect all available datasets\n", - "- load each dataset in a dataset\n", - "- combine all loaded datasets into a single dataset" + "To load the data into an xarray {py:class}`~xarray.Dataset`, you can invoke the `.to_dask()` method. \n", + "Internally, {py:class}`~intake_thredds.source.THREDDSMergedSource` does the following:\n", + "- descend down the given paths and collect all available datasets.\n", + "- load each dataset in a dataset.\n", + "- combine all loaded datasets into a single dataset." ] }, { "cell_type": "code", "execution_count": 47, - "id": "durable-antique", + "id": "defined-pipeline", "metadata": {}, "outputs": [ { @@ -2011,7 +2011,7 @@ { "cell_type": "code", "execution_count": null, - "id": "finished-participation", + "id": "involved-discount", "metadata": {}, "outputs": [], "source": [] From c5373fa4dd029714e1741bf3ac72e682143d5880 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 16 Feb 2021 18:20:30 -0700 Subject: [PATCH 08/20] Add tqdm --- docs/requirements.txt | 1 + intake_thredds/source.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 75db1e3..2be20e4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,4 +5,5 @@ pydata-sphinx-theme>=0.4.3 setuptools>=50.3 sphinx>=3.2 jupyterlab>=3 +tqdm -r ../requirements.txt diff --git a/intake_thredds/source.py b/intake_thredds/source.py index fdca052..22b5deb 100644 --- a/intake_thredds/source.py +++ b/intake_thredds/source.py @@ -68,7 +68,11 @@ def __init__(self, url, path, driver='opendap', progressbar=True, metadata=None) self._ds = None self.progressbar = progressbar if self.progressbar and tqdm is None: - raise ValueError("Missing package 'tqdm' required for progress bars.") + raise ValueError( + "Missing package 'tqdm' required for progress bars." + 'You can install tqdm via (1) python -m pip install tqdm or (2) conda install -c conda-forge tqdm.' + "In case you don't want to install tqdm, please use `progressbar=False`." + ) def _open_dataset(self): import xarray as xr From 722c962c383c7080b4026b8286b95af6511ca751 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Wed, 17 Feb 2021 10:11:47 +0100 Subject: [PATCH 09/20] Update test_source.py --- tests/test_source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_source.py b/tests/test_source.py index 4088f5d..78982a8 100644 --- a/tests/test_source.py +++ b/tests/test_source.py @@ -11,7 +11,7 @@ def THREDDSMergedSource_cat(): 'Datasets', 'ncep.reanalysis.dailyavgs', 'surface', - 'air*sig995*194*.nc', # todo: convert . to * ? + 'air.sig995.194*.nc', ] cat = intake.open_thredds_merged(thredds_cat_url, paths) assert cat.urlpath == thredds_cat_url @@ -26,7 +26,7 @@ def THREDDSMergedSource_cat_short_url(): @pytest.fixture(scope='module') def THREDDSMergedSource_cat_short_path(): - return ['air.sig995*194*.nc'] # todo: convert . to * ? + return ['air.sig995.194*.nc'] @pytest.fixture(scope='module') @@ -66,7 +66,7 @@ def test_THREDDSMergedSource(THREDDSMergedSource_cat): def test_THREDDSMergedSource_long_short(THREDDSMergedSource_cat, THREDDSMergedSource_cat_short): ds = THREDDSMergedSource_cat.to_dask() ds_short = THREDDSMergedSource_cat_short.to_dask() - xr.testing.assert_equal(ds, ds_short) # TODO: down load data only compare dims, coords, size + xr.testing.assert_equal(ds, ds_short) # download data, now faster only compare dims, coords, size def test_THREDDSMergedSource_simplecache_netcdf(THREDDSMergedSource_cat_short_simplecache): From 3dd0c55a0783b374aae3c63b52b01c24916cecb2 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Wed, 17 Feb 2021 10:13:43 +0100 Subject: [PATCH 10/20] Update test_source.py --- tests/test_source.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_source.py b/tests/test_source.py index 78982a8..27f0012 100644 --- a/tests/test_source.py +++ b/tests/test_source.py @@ -66,7 +66,9 @@ def test_THREDDSMergedSource(THREDDSMergedSource_cat): def test_THREDDSMergedSource_long_short(THREDDSMergedSource_cat, THREDDSMergedSource_cat_short): ds = THREDDSMergedSource_cat.to_dask() ds_short = THREDDSMergedSource_cat_short.to_dask() - xr.testing.assert_equal(ds, ds_short) # download data, now faster only compare dims, coords, size + xr.testing.assert_equal( + ds, ds_short + ) # downloads data now, faster only compare dims, coords, size def test_THREDDSMergedSource_simplecache_netcdf(THREDDSMergedSource_cat_short_simplecache): From 250218f9642127043a5c7ea58c9200d53e2335f3 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Wed, 17 Feb 2021 10:19:03 +0100 Subject: [PATCH 11/20] Update intake_thredds/source.py --- intake_thredds/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intake_thredds/source.py b/intake_thredds/source.py index 22b5deb..be3e868 100644 --- a/intake_thredds/source.py +++ b/intake_thredds/source.py @@ -34,7 +34,7 @@ class THREDDSMergedSource(DataSourceMixin): -------- >>> import intake >>> cat_url = 'https://psl.noaa.gov/thredds/catalog.xml' - >>> paths = ['Datasets', 'ncep.reanalysis.dailyavgs', 'surface', 'air*sig995*194*.nc'] + >>> paths = ['Datasets', 'ncep.reanalysis.dailyavgs', 'surface', 'air.sig995.194*.nc'] >>> src = intake.open_thredds_merged(cat_url, paths) >>> src sources: From 520745d9539f80581d614142dbf954d69de9ae13 Mon Sep 17 00:00:00 2001 From: AS Date: Wed, 17 Feb 2021 10:50:04 +0100 Subject: [PATCH 12/20] add driver and simplecache example --- docs/source/tutorial.ipynb | 212 ++++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 60 deletions(-) diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorial.ipynb index 26255b1..381d412 100644 --- a/docs/source/tutorial.ipynb +++ b/docs/source/tutorial.ipynb @@ -2,18 +2,16 @@ "cells": [ { "cell_type": "markdown", - "id": "caroline-wheel", "metadata": {}, "source": [ "# Tutorial\n", "\n", - "Intake-thredds provides an interface that combines functionality from [`siphon`](https://github.com/Unidata/siphon) and `intake` to retrieve data from THREDDS data servers. This tutorial provides an introduction to the API and features of intake-thredds. Let's begin by importing `intake`. " + "`intake-thredds` provides an interface that combines functionality from [`siphon`](https://github.com/Unidata/siphon) and `intake` to retrieve data from THREDDS data servers. This tutorial provides an introduction to the API and features of `intake-thredds`. Let's begin by importing `intake`. " ] }, { "cell_type": "code", "execution_count": 1, - "id": "minimal-calculation", "metadata": {}, "outputs": [], "source": [ @@ -22,7 +20,6 @@ }, { "cell_type": "markdown", - "id": "knowing-yugoslavia", "metadata": {}, "source": [ "## Loading a catalog\n", @@ -33,7 +30,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "cheap-assessment", "metadata": {}, "outputs": [], "source": [ @@ -43,7 +39,6 @@ { "cell_type": "code", "execution_count": 3, - "id": "inside-boutique", "metadata": {}, "outputs": [ { @@ -63,7 +58,6 @@ }, { "cell_type": "markdown", - "id": "sustainable-variation", "metadata": {}, "source": [ "## Using the catalog\n", @@ -74,7 +68,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "dated-litigation", "metadata": {}, "outputs": [ { @@ -100,7 +93,6 @@ }, { "cell_type": "markdown", - "id": "macro-capital", "metadata": {}, "source": [ "Once you've identified a dataset of interest, you can access it as follows:" @@ -109,7 +101,6 @@ { "cell_type": "code", "execution_count": 5, - "id": "everyday-motion", "metadata": {}, "outputs": [ { @@ -137,7 +128,6 @@ { "cell_type": "code", "execution_count": 6, - "id": "limiting-smart", "metadata": {}, "outputs": [ { @@ -154,7 +144,6 @@ }, { "cell_type": "markdown", - "id": "rapid-universal", "metadata": {}, "source": [ "## Loading a dataset\n", @@ -165,15 +154,14 @@ { "cell_type": "code", "execution_count": 7, - "id": "leading-breakdown", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 719 ms, sys: 185 ms, total: 904 ms\n", - "Wall time: 9.44 s\n" + "CPU times: user 267 ms, sys: 29.8 ms, total: 297 ms\n", + "Wall time: 12.8 s\n" ] }, { @@ -552,14 +540,14 @@ " dataset_title: Extended Reconstructed Sea Surface Tempe...\n", " source_doc: https://www.ncdc.noaa.gov/data-access/ma...\n", " References: https://www.psl.noaa.gov/data/gridded/da...\n", - " DODS_EXTRA.Unlimited_Dimension: time