Skip to content

Add MIDC reader #605

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 15 commits into from
Oct 29, 2018
2 changes: 2 additions & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,8 @@ relevant to solar energy modeling.
iotools.read_srml
iotools.read_srml_month_from_solardat
iotools.read_surfrad
iotools.read_midc
iotools.read_midc_raw_data_from_nrel

A :py:class:`~pvlib.location.Location` object may be created from metadata
in some files.
Expand Down
2 changes: 2 additions & 0 deletions docs/sphinx/source/whatsnew/v0.6.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Enhancements
:py:func:`pvlib.iotools.read_srml_month_from_solardat` to read University of
Oregon Solar Radiation Monitoring Laboratory data. (:issue:`589`)
* Created :py:func:`pvlib.iotools.read_surfrad` to read NOAA SURFRAD data. (:issue:`590`)
* Created :py:func:`pvlib.iotools.read_midc` and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`
to read NREL MIDC data. (:issue:`601`)

Bug fixes
~~~~~~~~~
Expand Down
1,441 changes: 1,441 additions & 0 deletions pvlib/data/midc_20181014.txt

Large diffs are not rendered by default.

1,441 changes: 1,441 additions & 0 deletions pvlib/data/midc_raw_20181018.txt

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
from pvlib.iotools.srml import read_srml # noqa: F401
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
from pvlib.iotools.midc import read_midc # noqa: F401
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401
196 changes: 196 additions & 0 deletions pvlib/iotools/midc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
"""Functions to read NREL MIDC data.
"""
from functools import partial
import pandas as pd

# VARIABLE_MAP is a dictionary mapping partial MIDC field names to their
# pvlib names. See docstring of read_midc for description.

VARIABLE_MAP = {
'Direct': 'dni',
'Global': 'ghi',
'Diffuse': 'dhi',
'Airmass': 'airmass',
'Azimuth Angle': 'solar_azimuth',
'Zenith Angle': 'solar_zenith',
'Air Temperature': 'temp_air',
'Temperature': 'temp_air',
'Dew Point Temp': 'temp_dew',
'Relative Humidity': 'relative_humidity',
}

# Maps problematic timezones to 'Etc/GMT' for parsing.

TZ_MAP = {
'PST': 'Etc/GMT+8',
'CST': 'Etc/GMT+6',
}


def map_midc_to_pvlib(variable_map, field_name):
"""A mapper function to rename Dataframe columns to their pvlib counterparts.

Parameters
----------
variable_map: Dictionary
A dictionary for mapping MIDC field name to pvlib name. See
VARIABLE_MAP for default value and description of how to construct
this argument.
field_name: string
The Column to map.

Returns
-------
label: string
The pvlib variable name associated with the MIDC field or the input if
a mapping does not exist.

Notes
-----
Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and
does not contain brackets. This should not be an issue unless MIDC file
headers are updated.

"""
new_field_name = field_name
for midc_name, pvlib_name in variable_map.items():
if field_name.startswith(midc_name):
# extract the instrument and units field and then remove units
instrument_units = field_name[len(midc_name):]
units_index = instrument_units.find('[')
instrument = instrument_units[:units_index - 1]
new_field_name = pvlib_name + instrument.replace(' ', '_')
break
return new_field_name


def format_index(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the second (time) column.

Parameters
----------
data: Dataframe
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
with the timezone and contain times in 'HH:MM' format.

Returns
-------
data: Dataframe
Dataframe with DatetimeIndex localized to the provided timezone.
"""
tz_raw = data.columns[1]
timezone = TZ_MAP.get(tz_raw, tz_raw)
datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
data = data.set_index(datetime)
data = data.tz_localize(timezone)
return data


def format_index_raw(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the third column.

Parameters
----------
data: Dataframe
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
label of the third (time) column.

Returns
-------
data: Dataframe
The data with a Datetime index localized to the provided timezone.
"""
tz_raw = data.columns[3]
timezone = TZ_MAP.get(tz_raw, tz_raw)
year = data.Year.apply(str)
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
data = data.set_index(index)
data = data.tz_localize(timezone)
return data


def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False):
"""Read in National Renewable Energy Laboratory Measurement and
Instrumentation Data Center [1]_ weather data.

Parameters
----------
filename: string
Filename or url of data to read.
variable_map: dictionary
Dictionary for mapping MIDC field names to pvlib names. See variable
`VARIABLE_MAP` for default and Notes section below for a description of
its format.
raw_data: boolean
Set to true to use format_index_raw to correctly format the date/time
columns of MIDC raw data files.

Returns
-------
data: Dataframe
A dataframe with DatetimeIndex localized to the provided timezone.

Notes
-----
Keys of the `variable_map` dictionary should include the first part
of a MIDC field name which indicates the variable being measured.

e.g. 'Global PSP [W/m^2]' is entered as a key of 'Global'

The 'PSP' indicating instrument is appended to the pvlib variable name
after mapping to differentiate measurements of the same variable. For a
full list of pvlib variable names see the `Variable Style Rules
<https://pvlib-python.readthedocs.io/en/latest/variables_style_rules.html>`_.

Be sure to check the units for the variables you will use on the
`MIDC site <https://midcdmz.nrel.gov/>`_.

References
----------
.. [1] NREL: Measurement and Instrumentation Data Center
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
"""
data = pd.read_csv(filename)
if raw_data:
data = format_index_raw(data)
else:
data = format_index(data)
mapper = partial(map_midc_to_pvlib, variable_map)
data = data.rename(columns=mapper)
return data


def read_midc_raw_data_from_nrel(site, start, end):
"""Request and read MIDC data directly from the raw data api.

Parameters
----------
site: string
The MIDC station id.
start: datetime
Start date for requested data.
end: datetime
End date for requested data.

Returns
-------
data:
Dataframe with DatetimeIndex localized to the station location.

Notes
-----
Requests spanning an instrumentation change will yield an error. See the
MIDC raw data api page here_ for more details and considerations.
.. _here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist
"""
args = {'site': site,
'begin': start.strftime('%Y%m%d'),
'end': end.strftime('%Y%m%d')}
endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?'
url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()])
return read_midc(url, raw_data=True)
73 changes: 73 additions & 0 deletions pvlib/test/test_midc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import inspect
import os

import pandas as pd
from pandas.util.testing import network
import pytest
import pytz

from pvlib.iotools import midc


test_dir = os.path.dirname(
os.path.abspath(inspect.getfile(inspect.currentframe())))
midc_testfile = os.path.join(test_dir, '../data/midc_20181014.txt')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps we should add a raw data file as well to decouple the format test from the network test

midc_raw_testfile = os.path.join(test_dir, '../data/midc_raw_20181018.txt')
midc_network_testfile = ('https://midcdmz.nrel.gov/apps/data_api.pl'
'?site=UAT&begin=20181018&end=20181019')


@pytest.mark.parametrize('field_name,expected', [
('Temperature @ 2m [deg C]', 'temp_air_@_2m'),
('Global PSP [W/m^2]', 'ghi_PSP'),
('Temperature @ 50m [deg C]', 'temp_air_@_50m'),
('Other Variable [units]', 'Other Variable [units]'),
])
def test_read_midc_mapper_function(field_name, expected):
assert midc.map_midc_to_pvlib(midc.VARIABLE_MAP, field_name) == expected


def test_midc_format_index():
data = pd.read_csv(midc_testfile)
data = midc.format_index(data)
start = pd.Timestamp("20181014 00:00")
start = start.tz_localize("MST")
end = pd.Timestamp("20181014 23:59")
end = end.tz_localize("MST")
assert type(data.index) == pd.DatetimeIndex
assert data.index[0] == start
assert data.index[-1] == end


def test_midc_format_index_tz_conversion():
data = pd.read_csv(midc_testfile)
data = data.rename(columns={'MST': 'PST'})
data = midc.format_index(data)
assert data.index[0].tz == pytz.timezone('Etc/GMT+8')


def test_midc_format_index_raw():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't this also need @network?

data = pd.read_csv(midc_raw_testfile)
data = midc.format_index_raw(data)
start = pd.Timestamp('20181018 00:00')
start = start.tz_localize('MST')
end = pd.Timestamp('20181018 23:59')
end = end.tz_localize('MST')
assert data.index[0] == start
assert data.index[-1] == end


def test_read_midc_var_mapping_as_arg():
data = midc.read_midc(midc_testfile, variable_map=midc.VARIABLE_MAP)
assert 'ghi_PSP' in data.columns
assert 'temp_air_@_2m' in data.columns
assert 'temp_air_@_50m' in data.columns


@network
def test_read_midc_raw_data_from_nrel():
start_ts = pd.Timestamp('20181018')
end_ts = pd.Timestamp('20181019')
data = midc.read_midc_raw_data_from_nrel('UAT', start_ts, end_ts)
assert 'dni_Normal' in data.columns
assert data.index.size == 2880