Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Feature 449 existing date parameter #452

Merged
merged 5 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ repos:
rev: 22.6.0
hooks:
- id: black
language_version: python3.9
language_version: python3.10
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
## [v0.1X.X] current - 2023-XX-XX
### Added
- User-defined output path for csv, xml, database [#402](https://github.com/OpenEnergyPlatform/open-MaStR/pull/402)
- Add date=existing parameter to Mastr.download [#452](https://github.com/OpenEnergyPlatform/open-MaStR/pull/452)
### Changed
### Removed
- Delete `on push` for github workflow [#445](https://github.com/OpenEnergyPlatform/open-MaStR/pull/445)
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sphinx>=2
sphinx<7
sphinx-rtd-theme
sphinx-tabs
m2r2
10 changes: 3 additions & 7 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
get_data_version_dir,
get_project_home_dir,
get_output_dir,
setup_logger
setup_logger,
)
import open_mastr.utils.orm as orm

Expand Down Expand Up @@ -65,7 +65,6 @@ class Mastr:
"""

def __init__(self, engine="sqlite") -> None:

validate_parameter_format_for_mastr_init(engine)
self.output_dir = get_output_dir()
self.home_directory = get_project_home_dir()
Expand Down Expand Up @@ -143,7 +142,7 @@ def download(
Either "today" or None if the newest data dump should be downloaded
rom the MaStR website. If an already downloaded dump should be used,
state the date of the download in the format
"yyyymmdd". Defaults to None.
"yyyymmdd" or use the string "existing". Defaults to None.

For API method:

Expand Down Expand Up @@ -215,10 +214,9 @@ def download(
method, data, api_data_types, api_location_types, **kwargs
)

date = transform_date_parameter(method, date, **kwargs)
date = transform_date_parameter(self, method, date, **kwargs)

if method == "bulk":

# Find the name of the zipped xml folder
bulk_download_date = parse_date_string(date)
xml_folder_path = os.path.join(self.output_dir, "data", "xml_download")
Expand Down Expand Up @@ -349,15 +347,13 @@ def to_csv(

# Export technologies to csv
for tech in technologies_to_export:

db_query_to_csv(
db_query=create_db_query(tech=tech, limit=limit, engine=self.engine),
data_table=tech,
chunksize=chunksize,
)
# Export additional tables to csv
for addit_table in additional_tables_to_export:

db_query_to_csv(
db_query=create_db_query(
additional_table=addit_table, limit=limit, engine=self.engine
Expand Down
50 changes: 28 additions & 22 deletions open_mastr/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,38 @@
import os
import json
import os
import sys
from contextlib import contextmanager
from datetime import date, datetime
from warnings import warn

import dateutil
import pandas as pd
import sqlalchemy
from sqlalchemy.sql import insert, literal_column
from dateutil.parser import parse
from sqlalchemy import create_engine
from sqlalchemy.orm import Query, sessionmaker

import pandas as pd
from sqlalchemy.sql import insert, literal_column
from tqdm import tqdm

from open_mastr.soap_api.download import MaStRAPI, log
from open_mastr.soap_api.metadata.create import create_datapackage_meta_json
from open_mastr.utils import orm
from open_mastr.utils.config import (
get_filenames,
get_data_version_dir,
column_renaming,
get_data_version_dir,
get_filenames,
)

from open_mastr.soap_api.download import MaStRAPI, log
from open_mastr.utils.constants import (
BULK_DATA,
TECHNOLOGIES,
ADDITIONAL_TABLES,
API_DATA,
API_DATA_TYPES,
API_LOCATION_TYPES,
BULK_INCLUDE_TABLES_MAP,
BULK_ADDITIONAL_TABLES_CSV_EXPORT_MAP,
BULK_DATA,
BULK_INCLUDE_TABLES_MAP,
ORM_MAP,
TECHNOLOGIES,
UNIT_TYPE_MAP,
ADDITIONAL_TABLES,
)


Expand Down Expand Up @@ -88,7 +87,6 @@ def validate_parameter_format_for_download_method(
api_location_types,
**kwargs,
) -> None:

if "technology" in kwargs:
data = kwargs["technology"]
warn("'technology' parameter is deprecated. Use 'data' instead")
Expand Down Expand Up @@ -173,7 +171,7 @@ def validate_parameter_date(method, date) -> None:
if date is None: # default
return
if method == "bulk":
if date != "today":
if date not in ["today", "existing"]:
try:
_ = parse(date)
except (dateutil.parser._parser.ParserError, TypeError) as e:
Expand Down Expand Up @@ -297,11 +295,25 @@ def transform_data_parameter(
return data, api_data_types, api_location_types, harmonisation_log


def transform_date_parameter(method, date, **kwargs):

def transform_date_parameter(self, method, date, **kwargs):
if method == "bulk":
date = kwargs.get("bulk_date", date)
date = "today" if date is None else date
if date == "existing":
existing_files_list = os.listdir(
os.path.join(self.home_directory, "data", "xml_download")
)
if not existing_files_list:
date = "today"
print(
"By choosing `date`='existing' you want to use an existing "
"xml download."
"However no xml_files were downloaded yet. The parameter `date` is"
"therefore set to 'today'."
)
# we assume that there is only one file in the folder which is the
# zipped xml folder
date = existing_files_list[0].split("_")[1].split(".")[0]
elif method == "API":
date = kwargs.get("api_date", date)

Expand Down Expand Up @@ -333,7 +345,6 @@ def print_api_settings(
api_processes,
api_location_types,
):

print(
f"Downloading with soap_API.\n\n -- API settings -- \nunits after date: "
f"{date}\nunit download limit per data: "
Expand Down Expand Up @@ -467,9 +478,7 @@ def create_db_query(
unit_type_map_reversed = reverse_unit_type_map()

with session_scope(engine=engine) as session:

if tech:

# Select orm tables for specified additional_data.
orm_tables = {
f"{dat}": getattr(orm, ORM_MAP[tech].get(dat, "KeyNotAvailable"), None)
Expand Down Expand Up @@ -540,7 +549,6 @@ def create_db_query(
return query_tech

if additional_table:

orm_table = getattr(orm, ORM_MAP[additional_table], None)

query_additional_tables = Query(orm_table, session=session)
Expand Down Expand Up @@ -573,7 +581,6 @@ def save_metadata(data: list = None, engine=None) -> None:
unit_type_map_reversed = reverse_unit_type_map()

with session_scope(engine=engine) as session:

# check for latest db entry for exported technologies
mastr_technologies = [unit_type_map_reversed[tech] for tech in data]
newest_date = (
Expand Down Expand Up @@ -729,7 +736,6 @@ def db_query_to_csv(db_query, data_table: str, chunksize: int) -> None:
chunk_df[col] = chunk_df[col].str.replace("\r", "")

if not chunk_df.empty:

if chunk_number == 0:
chunk_df.to_csv(
csv_file,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def parameter_dict_working_list():
None,
["wind", "solar"],
],
"date": ["today", "20200108"],
"date": ["today", "20200108", "existing"],
"bulk_cleansing": [True, False],
"api_processes": [None],
"api_limit": [50],
Expand Down