Skip to content

Commit

Permalink
Merge pull request #337 from OpenEnergyPlatform/feature-308-transform…
Browse files Browse the repository at this point in the history
…-technology-parameter

Feature #308 transform technology parameter

Closes #308
  • Loading branch information
FlorianK13 authored Aug 29, 2022
2 parents 9ba1132 + 4b78b43 commit 9717527
Show file tree
Hide file tree
Showing 14 changed files with 374 additions and 366 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ Here is a template for new release sections:

### Added
- Read version number for CI from setup.py [#333](https://github.com/rl-institut/super-repo/pull/333)

### Changed
- [#](https://github.com/rl-institut/super-repo/pull/)
- Technology parameter is renamed to data for better comprehension [#337](https://github.com/OpenEnergyPlatform/open-MaStR/pull/337)

### Removed
- [#](https://github.com/rl-institut/super-repo/pull/)

Expand Down
2 changes: 1 addition & 1 deletion docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ This data is updated on a daily base.
In the following, the process is described that is started when calling the download function with the parameter method="bulk".
First, the zipped files are downloaded and saved in `$HOME/.open-MaStR/data/xml_download`. The zipped folder contains many xml files,
which represent the different tables from the MaStR. Those tables are then parsed to a sqlite database. If only some specific
technologies are of interest, they can be specified with the parameter `technology`. Every table that is selected in `technology` will be deleted, if existent,
data are of interest, they can be specified with the parameter `data`. Every table that is selected in `data` will be deleted, if existent,
and then filled with data from the xml files.

In the last step, a basic data cleansing is performed. Many entries in the MaStR from the bulk download are replaced by numbers.
Expand Down
4 changes: 2 additions & 2 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ additional parameters can be set to define in detail which data should be obtain
* - argument
- options for specification
- explanation
* - technology
* - data
- ["wind","biomass","combustion","gsgk","hydro","nuclear","storage","solar"]
- Select technologies to download.
- Select data to download.
* - api_data_types
- ["unit_data","eeg_data","kwk_data","permit_data"]
- Select the type of data to download.
Expand Down
8 changes: 4 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# bulk download
bulk_date_string = "today"
bulk_cleansing = True
technology_bulk = [
data_bulk = [
"biomass",
"combustion",
"gsgk",
Expand All @@ -43,7 +43,7 @@
api_limit = 10
api_processes = None

technology_api = [
data_api = [
"biomass",
"combustion",
"gsgk",
Expand Down Expand Up @@ -71,7 +71,7 @@
# bulk download
db.download(
method="bulk",
technology=technology_bulk,
data=data_bulk,
bulk_date_string="today",
bulk_cleansing=True,
)
Expand All @@ -83,7 +83,7 @@
api_chunksize=api_chunksize,
api_limit=api_limit,
api_processes=api_processes,
technology=technology_api,
data=data_api,
api_data_types=api_data_types,
api_location_types=api_location_types,
)
Expand Down
127 changes: 68 additions & 59 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,26 @@
from open_mastr.soap_api.mirror import MaStRMirror

from open_mastr.utils.helpers import (
technology_input_harmonisation,
print_api_settings,
validate_api_credentials,
transform_data_parameter,
validate_parameter_data,
)
from open_mastr.utils.config import create_data_dir, get_data_version_dir
from open_mastr.utils.config import (
create_data_dir,
get_data_version_dir,
get_project_home_dir,
)
import open_mastr.utils.orm as orm
from open_mastr.utils.data_io import cleaned_data


# import initialize_database dependencies
from open_mastr.utils.helpers import (
create_database_engine,
validate_parameter_format_for_download_method,
validate_parameter_format_for_mastr_init,
parse_date_string,
)
import open_mastr.utils.orm as orm
from open_mastr.utils.config import get_project_home_dir


class Mastr:
Expand Down Expand Up @@ -65,7 +68,7 @@ def __init__(self, engine="sqlite") -> None:
def download(
self,
method="bulk",
technology=None,
data=None,
bulk_date_string="today",
bulk_cleansing=True,
api_processes=None,
Expand All @@ -86,12 +89,33 @@ def download(
zipped bulk download or via the MaStR API. The latter requires an account
from marktstammdatenregister.de,
(see :ref:`Configuration <Configuration>`). Default to 'bulk'.
technology: str or list or None, optional
Determines which technologies are written to the database. If None, all technologies are
used. If it is a list, possible entries are "wind", "solar", "biomass", "hydro", "gsgk",
"combustion", "nuclear", "gas", "storage", "electricity_consumer", "location", "market",
"grid", "balancing_area" or "permit". If only one technology is of interest, this can be
given as a string. Default to None, where all technologies are included.
data: str or list or None, optional
Determines which types of data are written to the database. If None, all data is
used. If it is a list, possible entries are listed at the table below with respect to the download method.
Missing categories are being developed. If only one data is of interest, this can be
given as a string. Default to None, where all data is included.
.. csv-table:: Values for data parameter
:header-rows: 1
:widths: 5 5 5
"Data", "Bulk", "API"
"wind", "Yes", "Yes"
"solar", "Yes", "Yes"
"biomass", "Yes", "Yes"
"hydro", "Yes", "Yes"
"gsgk", "Yes", "Yes"
"combustion", "Yes", "Yes"
"nuclear", "Yes", "Yes"
"gas", "Yes", "Yes"
"storage", "Yes", "Yes"
"electricity_consumer", "Yes", "No"
"location", "Yes", "Yes"
"market", "Yes", "No"
"grid", "Yes", "No"
"balancing_area", "Yes", "No"
"permit", "Yes", "Yes"
bulk_date_string: str, optional
Either "today" if the newest data dump should be downloaded from the MaStR website. If
an already downloaded dump should be used, state the date of the download in the format
Expand Down Expand Up @@ -137,16 +161,16 @@ def download(
Defaults to 1000.
api_data_types: list or None, optional
Select type of additional data that should be retrieved. Choose from
"unit_data", "eeg_data", "kwk_data", "permit_data".
"unit_data", "eeg_data", "kwk_data", "permit_data". Defaults to all.
api_location_types: list or None, optional
Select type of location that should be retrieved. Choose from
"location_elec_generation", "location_elec_consumption", "location_gas_generation",
"location_gas_consumption".
"location_gas_consumption". Defaults to all.
"""

validate_parameter_format_for_download_method(
method=method,
technology=technology,
data=data,
bulk_date_string=bulk_date_string,
bulk_cleansing=bulk_cleansing,
api_processes=api_processes,
Expand All @@ -156,6 +180,12 @@ def download(
api_data_types=api_data_types,
api_location_types=api_location_types,
)
(
data,
api_data_types,
api_location_types,
harm_log,
) = transform_data_parameter(method, data, api_data_types, api_location_types)

if method == "bulk":

Expand All @@ -172,35 +202,13 @@ def download(
write_mastr_xml_to_database(
engine=self.engine,
zipped_xml_file_path=zipped_xml_file_path,
technology=technology,
data=data,
bulk_cleansing=bulk_cleansing,
bulk_download_date=bulk_download_date,
)

if method == "API":
validate_api_credentials()
if isinstance(technology, str):
technology = [technology]
elif technology is None:
technology = [
"wind",
"biomass",
"combustion",
"gsgk",
"hydro",
"nuclear",
"storage",
"solar",
]
(
harm_log,
api_data_types,
api_location_types,
) = technology_input_harmonisation(
technology=technology,
api_data_types=api_data_types,
api_location_types=api_location_types,
)

# Set api_processes to None in order to avoid the malfunctioning usage
if api_processes:
Expand All @@ -212,7 +220,7 @@ def download(

print_api_settings(
harmonisation_log=harm_log,
technology=technology,
data=data,
api_date=api_date,
api_data_types=api_data_types,
api_chunksize=api_chunksize,
Expand All @@ -227,11 +235,11 @@ def download(
restore_dump=None,
)
# Download basic unit data
mastr_mirror.backfill_basic(technology, limit=api_limit, date=api_date)
mastr_mirror.backfill_basic(data, limit=api_limit, date=api_date)

# Download additional unit data
for tech in technology:
# mastr_mirror.create_additional_data_requests(tech)
for tech in data:
# mastr_mirror.create_additional_data_requests(data)
for data_type in api_data_types:
mastr_mirror.retrieve_additional_data(
tech, data_type, chunksize=api_chunksize, limit=api_limit
Expand Down Expand Up @@ -267,7 +275,7 @@ def to_csv(
chunksize: int
Defines the chunksize of the tables export. Default value is 500.000.
limit: None or int
Limits the number of exported technology and location units.
Limits the number of exported data and location units.
"""

create_data_dir()
Expand Down Expand Up @@ -305,24 +313,25 @@ def to_csv(
"location_gas_consumption",
]

# Validate and parse tables parameter TODO parameter renaming
validate_parameter_data(method="bulk", data=tables)
(
data,
api_data_types,
api_location_types,
harm_log,
) = transform_data_parameter(
method="bulk", data=tables, api_data_types=None, api_location_types=None
)

# Determine tables to export
technologies_to_export = []
additional_tables_to_export = []
if isinstance(tables, str):
# str to list
tables = [tables]
if tables is None:
technologies_to_export = all_technologies
additional_tables_to_export = all_additional_tables
print(f"Tables: {technologies_to_export}, {additional_tables_to_export}")
elif isinstance(tables, list):
for table in tables:
if table in all_technologies:
technologies_to_export.append(table)
elif table in all_additional_tables:
additional_tables_to_export.append(table)
else:
raise ValueError("Tables parameter has an invalid string!")
for table in data:
if table in all_technologies:
technologies_to_export.append(table)
elif table in all_additional_tables:
additional_tables_to_export.append(table)

if technologies_to_export:
print(f"\nTechnology tables: {technologies_to_export}")
Expand All @@ -336,7 +345,7 @@ def to_csv(
if technologies_to_export:
# fill basic unit table, after downloading with method = 'bulk' to use API export functions
api_export.reverse_fill_basic_units(technology=technologies_to_export)
# export to csv per technology
# export to csv per data
api_export.to_csv(
technology=technologies_to_export,
statistic_flag=None,
Expand Down
Loading

0 comments on commit 9717527

Please # to comment.