Merge pull request #77 from NREL/bnb/dev

Bnb/dev
NREL · Oct 28, 2024 · 8b8f859 · 8b8f859
2 parents 76a6d86 + e815c42
commit 8b8f859
Show file tree

Hide file tree

Showing 16 changed files with 493 additions and 100 deletions.
diff --git a/README.rst b/README.rst
@@ -49,6 +49,12 @@ The NSRDB `Data Model
 aggregation framework that sources, processes, and prepares data for input to
 All-Sky.
 
+The MLClouds Model
+==================
+The `MLClouds Model <https://github.com/NREL/mlclouds.git>`_ is used to predict
+missing cloud properties (a.k.a. Gap Fill). The NSRDB interface with MLClouds
+can be found `here <https://github.com/NREL/nsrdb/tree/master/nsrdb/gap_fill>`_.
+
 Installation
 ============
 

diff --git a/nsrdb/cli.py b/nsrdb/cli.py
@@ -125,11 +125,16 @@ def main(ctx, config, verbose):
 
     To do a standard CONUS / Full Disc run use the following commands::
 
-        $ config='{"year": <year>, "out_dir": <out_dir>}'
-        $ python -m nsrdb.cli create-configs -c config
+        $ CONFIG='{"year": <year>, "out_dir": <out_dir>}'
+
+        $ python -m nsrdb.cli create-configs -c ${CONFIG}
+
         $ cd <out_dir>
+
         $ bash run.sh (run this until all main steps are complete)
+
         $ cd post_proc
+
         $ bash run.sh (run this until all post-proc steps are complete)
 
     See the help pages of the module CLIs for more details on the config files
@@ -248,16 +253,17 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
 @click.option(
     '--run_type',
     '-r',
-    default='full',
+    default='surfrad',
     type=str,
-    help="""Run type to create configs for. Can be "full" (generates all config
-    and pipline files for the given year, including all domain main runs,
-    blending, aggregation, and collection), or "main" (for standard run without
-    post-processing, with data-model, ml-cloud-fill, all-sky, and
-    collect-data-model), "aggregate" (for aggregating post-2018 data to
-    pre-2018 resolution), "blend" (for blending east and west domains into a
-    single domain), or "post" (for all blending / aggregation / collection for
-    a given year)""",
+    help="""Run type to create configs for. Can be "surfrad" (just writes a
+    single template config with any provided kwargs replaced, with a surfrad
+    meta file), "full" (generates all config and pipline files for the given
+    year, including all domain main runs, blending, aggregation, and
+    collection), or "main" (for standard run without post-processing, with
+    data-model, ml-cloud-fill, all-sky, and collect-data-model), "aggregate"
+    (for aggregating post-2018 data to pre-2018 resolution), "blend" (for
+    blending east and west domains into a single domain), or "post" (for all
+    blending / aggregation / collection for a given year)""",
 )
 @click.option(
     '--all_domains',
@@ -277,27 +283,30 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
 )
 @click.pass_context
 def create_configs(
-    ctx, config, run_type='full', all_domains=False, collect=False
+    ctx, config, run_type='surfrad', all_domains=False, collect=False
 ):
     """Create config files for standard NSRDB runs using config templates.
 
-    Examples
-    --------
-    $ python -m nsrdb.cli create-configs -c '{"year": 2020, "out_dir": "./"}'
+    To generate all full_disc / conus run directories for east /
+    west regions, each with main routine config files contained run the
+    following::
+
+    $ CONFIG='{"year": 2020, "out_dir": "./"}'
 
-    The above will generate all full_disc / conus run directories for east /
-    west regions, each with main routine config files contained. Additionally,
-    conus / full_disc blend configs, aggregation config, collection config, and
-    a post processing pipeline config with all these steps will be written to a
-    "post_proc" directory so that post-processing can be run simply with::
+    $ python -m nsrdb.cli create-configs --run_type full -c ${CONFIG}
+
+    Additionally, conus / full_disc blend configs, aggregation config,
+    collection config, and a post processing pipeline config with all these
+    steps will be written to a "post_proc" directory so that post-processing
+    can be run simply with::
 
     $ python -m nsrdb.cli pipeline -c config_pipeline_post.json
     """
 
     ctx.ensure_object(dict)
     func_name = f'collect_{run_type}' if collect else run_type
     func_name = 'main_all' if run_type == 'main' and all_domains else func_name
-    valid_types = ['full', 'main', 'aggregate', 'blend', 'post']
+    valid_types = ['full', 'main', 'aggregate', 'blend', 'post', 'surfrad']
     msg = (
         f'Received unknown "run_type" {run_type}. Accepted values are '
         f'{valid_types}'
@@ -436,6 +445,7 @@ def ml_cloud_fill(ctx, config, verbose=False, pipeline_step=None):
                 "col_chunk": 10000,
                 "fill_all": false,
                 "max_workers": 4
+                "model_path": ...
             }
         }
 
@@ -659,7 +669,7 @@ def collect_data_model(ctx, config, verbose=False, pipeline_step=None):
     '-c',
     type=CONFIG_TYPE,
     required=True,
-    help='Path to config file or dict with kwargs for NSRDB.all_sky()',
+    help='Path to config file or dict with kwargs for NSRDB.collect_final()',
 )
 @click.option(
     '-v',
@@ -692,6 +702,34 @@ def collect_final(ctx, config, verbose=False, pipeline_step=None):
         )
 
 
+@main.command()
+@click.option(
+    '--config',
+    '-c',
+    type=CONFIG_TYPE,
+    required=True,
+    help='Path to config file or dict with kwargs for NSRDB.collect_daily()',
+)
+@click.option(
+    '-v',
+    '--verbose',
+    is_flag=True,
+    help='Flag to turn on debug logging. Default is False.',
+)
+@click.pass_context
+def collect_daily(ctx, config, verbose=False, pipeline_step=None):
+    """Collect daily files into a final file."""
+
+    BaseCLI.kickoff_single(
+        ctx=ctx,
+        module_name=ModuleName.COLLECT_DAILY,
+        func=Collector.collect_daily,
+        config=config,
+        verbose=verbose,
+        pipeline_step=pipeline_step,
+    )
+
+
 @main.command()
 @click.option(
     '--config',
@@ -1045,6 +1083,7 @@ def batch(
 Pipeline.COMMANDS[ModuleName.AGGREGATE] = aggregate
 Pipeline.COMMANDS[ModuleName.COLLECT_DATA_MODEL] = collect_data_model
 Pipeline.COMMANDS[ModuleName.COLLECT_FINAL] = collect_final
+Pipeline.COMMANDS[ModuleName.COLLECT_DAILY] = collect_daily
 Pipeline.COMMANDS[ModuleName.TMY] = tmy
 Pipeline.COMMANDS[ModuleName.COLLECT_BLEND] = collect_blend
 Pipeline.COMMANDS[ModuleName.COLLECT_AGGREGATE] = collect_aggregate

diff --git a/nsrdb/config/create_configs.py b/nsrdb/config/create_configs.py
@@ -34,10 +34,12 @@
     'meta_dir': DEFAULT_META_DIR,
 }
 
-MAIN_KWARGS = {
-    **BASE_KWARGS,
-    'extent': 'full',
-    'satellite': 'east',
+MAIN_KWARGS = {**BASE_KWARGS, 'extent': 'full', 'satellite': 'east'}
+
+SURFRAD_KWARGS = {
+    **MAIN_KWARGS,
+    'freq': '15min',
+    'spatial': '4km',
 }
 
 BLEND_KWARGS = {
@@ -47,10 +49,7 @@
     'main_dir': '../',
 }
 
-COLLECT_BLEND_KWARGS = {
-    **BASE_KWARGS,
-    'extent': 'full',
-}
+COLLECT_BLEND_KWARGS = {**BASE_KWARGS, 'extent': 'full'}
 
 AGG_KWARGS = {
     **BASE_KWARGS,
@@ -62,7 +61,7 @@
     'conus_freq': '5min',
     'final_freq': '30min',
     'n_chunks': 32,
-    'source_priority': ['conus', 'full_disk'],
+    'source_priority': ['conus', 'full_disc'],
 }
 
 COLLECT_AGG_KWARGS = {
@@ -102,14 +101,16 @@ class CreateConfigs:
     standard CONUS / Full Disc runs."""
 
     MAIN_RUN_NAME = '{basename}_{satellite}_{extent}_{year}_{spatial}_{freq}'
+    SURFRAD_RUN_NAME = '{basename}_{year}_surfrad'
     BLEND_RUN_NAME = '{basename}_{extent}_{year}_blend'
     AGG_RUN_NAME = '{basename}_{year}_aggregate'
     COLLECT_AGG_RUN_NAME = '{basename}_{year}_collect_aggregate'
     COLLECT_BLEND_RUN_NAME = '{basename}_{extent}_{year}_collect_blend'
 
     @classmethod
-    def _init_kwargs(cls, kwargs, default_kwargs):
+    def init_kwargs(cls, kwargs=None, default_kwargs=None):
         """Initialize config with default kwargs."""
+        default_kwargs = default_kwargs or {}
         msg = f'kwargs must have a "year" key. Received {kwargs}.'
         assert 'year' in kwargs, msg
         config = copy.deepcopy(default_kwargs)
@@ -211,6 +212,7 @@ def _get_run_name(cls, config, run_type='main'):
             {k: v for k, v in BASE_KWARGS.items() if k not in config}
         )
         pattern_dict = {
+            'surfrad': cls.SURFRAD_RUN_NAME,
             'main': cls.MAIN_RUN_NAME,
             'blend': cls.BLEND_RUN_NAME,
             'aggregate': cls.AGG_RUN_NAME,
@@ -227,7 +229,7 @@ def _get_run_name(cls, config, run_type='main'):
         return pattern.format(**run_config)
 
     @classmethod
-    def _update_run_templates(cls, config):
+    def _update_run_templates(cls, config, run_type='main'):
         """Replace format keys and dictionary keys in config templates with
         user input values."""
 
@@ -236,6 +238,17 @@ def _update_run_templates(cls, config):
             f'{pprint.pformat(config, indent=2)}'
         )
 
+        config['doy_range'] = config.get(
+            'doy_range',
+            ([1, 367] if calendar.isleap(config['year']) else [1, 366]),
+        )
+        config['start_doy'], config['end_doy'] = (
+            config['doy_range'][0],
+            config['doy_range'][1],
+        )
+        config['run_name'] = cls._get_run_name(config, run_type=run_type)
+        config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])
+
         template = (
             PRE2018_CONFIG_TEMPLATE
             if int(config['year']) < 2018
@@ -263,6 +276,22 @@ def _update_run_templates(cls, config):
             config_dict, cls._get_config_file(config, 'pipeline')
         )
 
+        run_file = os.path.join(config['out_dir'], 'run.sh')
+        with open(run_file, 'w') as f:
+            f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')
+
+        logger.info(f'Saved run script: {run_file}.')
+
+    @classmethod
+    def surfrad(cls, kwargs):
+        """Get basic config template specified parameters replaced."""
+        config = cls.init_kwargs(kwargs, SURFRAD_KWARGS)
+        config['extent_tag'] = EXTENT_MAP['extent_tag'][config['extent']]
+        config['meta_file'] = os.path.join(
+            config['meta_dir'], 'surfrad_meta.csv'
+        )
+        cls._update_run_templates(config, run_type='surfrad')
+
     @classmethod
     def main(cls, kwargs):
         """Modify config files with specified parameters
@@ -273,7 +302,7 @@ def main(cls, kwargs):
             Dictionary of parameters including year, basename, satellite,
             extent, freq, spatial, meta_file, doy_range
         """
-        config = cls._init_kwargs(kwargs, MAIN_KWARGS)
+        config = cls.init_kwargs(kwargs, MAIN_KWARGS)
         msg = (
             '"extent" key not provided. Provide "extent" so correct input '
             'data can be selected'
@@ -284,27 +313,8 @@ def main(cls, kwargs):
         config['meta_file'] = cls._get_meta(config)
         config['spatial'], config['freq'] = cls._get_res(config)
 
-        config['doy_range'] = config.get(
-            'doy_range',
-            ([1, 367] if calendar.isleap(config['year']) else [1, 366]),
-        )
-
-        config['start_doy'], config['end_doy'] = (
-            config['doy_range'][0],
-            config['doy_range'][1],
-        )
-
-        config['run_name'] = cls._get_run_name(config)
-        config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])
-
         cls._update_run_templates(config)
 
-        run_file = os.path.join(config['out_dir'], 'run.sh')
-        with open(run_file, 'w') as f:
-            f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')
-
-        logger.info(f'Saved run script: {run_file}.')
-
     @classmethod
     def main_all(cls, kwargs):
         """Modify config files for all domains with specified parameters.
@@ -463,7 +473,7 @@ def _get_agg_entry(cls, config, extent):
 
     @classmethod
     def _aggregate(cls, kwargs):
-        """Get config for conus and full disk high-resolution to low-resolution
+        """Get config for conus and full disc high-resolution to low-resolution
         aggregation.  This is then used as the input to `nsrdb.cli.aggregate`
 
         Parameters
@@ -472,14 +482,14 @@ def _aggregate(cls, kwargs):
             Dictionary with keys specifying the case for which to aggregate
             files
         """
-        config = cls._init_kwargs(kwargs, AGG_KWARGS)
+        config = cls.init_kwargs(kwargs, AGG_KWARGS)
 
         if config['year'] == 2018:
             data = NSRDB_2018
 
         else:
             data = {
-                'full_disk': cls._get_agg_entry(config, extent='full'),
+                'full_disc': cls._get_agg_entry(config, extent='full'),
                 'conus': cls._get_agg_entry(config, extent='conus'),
                 'final': cls._get_agg_entry(config, extent='final'),
             }
@@ -490,7 +500,7 @@ def _aggregate(cls, kwargs):
 
     @classmethod
     def aggregate(cls, kwargs):
-        """Get config for conus and full disk high-resolution to low-resolution
+        """Get config for conus and full disc high-resolution to low-resolution
         aggregation.  This is then used as the input to `nsrdb.cli.aggregate`
 
         Parameters
@@ -521,7 +531,7 @@ def _blend(cls, kwargs):
             Dictionary with keys specifying the case for which to blend data
             files
         """
-        config = cls._init_kwargs(kwargs, BLEND_KWARGS)
+        config = cls.init_kwargs(kwargs, BLEND_KWARGS)
         config['map_col'] = EXTENT_MAP['map_col'][config['extent']]
         config['lon_seam'] = EXTENT_MAP['lon_seam'][config['extent']]
         config['meta_file'] = cls._get_meta(config, run_type='blend')
@@ -599,7 +609,7 @@ def _collect_blend(cls, kwargs):
             Dictionary with keys specifying the case for blend collection
         """
 
-        config = cls._init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
+        config = cls.init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
         config['meta_final'] = cls._get_meta(config, run_type='collect-blend')
         config['collect_dir'] = cls._get_run_name(config, run_type='blend')
         config['collect_tag'] = config['collect_dir'].replace('_blend', '')
@@ -650,7 +660,7 @@ def _collect_aggregate(cls, kwargs):
         kwargs : dict
             Dictionary with keys specifying the case for aggregation collection
         """
-        config = cls._init_kwargs(kwargs, COLLECT_AGG_KWARGS)
+        config = cls.init_kwargs(kwargs, COLLECT_AGG_KWARGS)
 
         config['meta_final'] = cls._get_meta(
             config, run_type='collect-aggregate'

diff --git a/nsrdb/config/templates/config_nsrdb_post2017.json b/nsrdb/config/templates/config_nsrdb_post2017.json
@@ -5,7 +5,7 @@
     "max_workers": 10,
     "n_chunks": 1,
     "memory": 178,
-    "n_writes": 50,
+    "n_writes": 5,
     "walltime": 48
   },
   "daily-all-sky": {},
@@ -80,4 +80,4 @@
     "fill_all": false,
     "max_workers": 4
   }
-}
+}