Skip to content

Commit

Permalink
Merge pull request #129 from podaac/release/0.10.0
Browse files Browse the repository at this point in the history
Release/0.10.0
  • Loading branch information
jamesfwood authored Jan 2, 2025
2 parents 705ac78 + 5f8e1b5 commit 222365b
Show file tree
Hide file tree
Showing 15 changed files with 1,766 additions and 1,266 deletions.
25 changes: 24 additions & 1 deletion .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
- name: SonarCloud Scan
uses: sonarsource/sonarcloud-github-action@master
uses: sonarsource/sonarqube-scan-action@v4
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
Expand Down Expand Up @@ -243,6 +243,29 @@ jobs:
pull: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Deploy Harmony
env:
ENV: ${{ env.venue }}
CMR_USER: ${{ secrets.CMR_USER }}
CMR_PASS: ${{ secrets.CMR_PASS }}
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
working-directory: deployment
run:
poetry run python harmony_deploy.py --tag ${{ env.software_version }}
- name: Create Release
id: create_release
if: |
github.ref == 'refs/heads/main'
uses: softprops/action-gh-release@v2
with:
tag_name: "${{ env.software_version }}" # Use the tag that triggered the action
release_name: Release v{{ env.software_version }}
draft: false
generate_release_notes: true
token: ${{ secrets.GITHUB_TOKEN }}

# As of 2023/01/23 these steps below for scanning the Docker image with Snyk are failing. I've tried both the official Snyk
# action https://github.com/snyk/actions/tree/master/docker and this method below of manually calling the CLI.
# The error when using the official Snyk action is
Expand Down
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,8 @@ disable=raw-checker-failed,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
too-many-arguments
too-many-arguments,
too-many-positional-arguments

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed


## [0.10.0]

### Added
- Update Github Actions
- Added harmony deployment into github actions.
### Changed
- [issue #117](https://github.com/podaac/concise/issues/117): Add part of URL to output file name
- Update python libraries
- Update harmony service lib that changed project structure
- Add Concise exception to propogate up to harmony api calls.
### Deprecated
### Removed
### Fixed
- Variable Merging
- Fixed way we merge variables when granules in a collection have varying variables.


## [0.9.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2024 California Institute of Technology

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
64 changes: 64 additions & 0 deletions deployment/harmony_deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import requests
import json
import logging
import argparse
from requests.auth import HTTPBasicAuth

# Environment variables
ENV = os.getenv('ENV')
CMR_USER = os.getenv('CMR_USER')
CMR_PASS = os.getenv('CMR_PASS')

def bearer_token() -> str:
tokens = []
headers = {'Accept': 'application/json'}
url = f"https://{'uat.' if ENV == 'uat' else ''}urs.earthdata.nasa.gov/api/users"

# First just try to get a token that already exists
try:
resp = requests.get(url + "/tokens", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)

for x in response_content:
tokens.append(x['access_token'])

except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# No tokens exist, try to create one
if not tokens:
try:
resp = requests.post(url + "/token", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)
tokens.append(response_content['access_token'])
except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# If still no token, then we can't do anything
if not tokens:
raise RuntimeError("Unable to get bearer token from EDL")

return next(iter(tokens))

if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Update the service image tag.")
parser.add_argument("--tag", help="The new tag version to update.", required=True)
args = parser.parse_args()

url = f"https://harmony.{'uat.' if ENV == 'uat' else ''}earthdata.nasa.gov/service-image-tag/podaac-concise"
token = bearer_token()

headers = {
"Authorization": f"Bearer {token}",
"Content-type": "application/json"
}
data = {
"tag": args.tag
}

response = requests.put(url, headers=headers, json=data)
response.raise_for_status()

print(f"Response JSON: {response.json()}")
2 changes: 1 addition & 1 deletion podaac/merger/harmony/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""A Harmony CLI wrapper around Concise"""

from argparse import ArgumentParser
import harmony
import harmony_service_lib as harmony
from podaac.merger.harmony.service import ConciseService


Expand Down
4 changes: 2 additions & 2 deletions podaac/merger/harmony/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import re
from urllib.parse import urlparse

from harmony.logging import build_logger
from harmony.util import download
from harmony_service_lib.logging import build_logger
from harmony_service_lib.util import download


def multi_core_download(urls, destination_dir, access_token, cfg, process_count=None):
Expand Down
150 changes: 96 additions & 54 deletions podaac/merger/harmony/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
from shutil import copyfile
from urllib.parse import urlsplit
from uuid import uuid4
import traceback
import sys

from harmony.adapter import BaseHarmonyAdapter
from harmony.util import bbox_to_geometry, stage
from harmony_service_lib.adapter import BaseHarmonyAdapter
from harmony_service_lib.util import bbox_to_geometry, stage
from harmony_service_lib.exceptions import HarmonyException
from pystac import Catalog, Item
from pystac.item import Asset

Expand All @@ -20,6 +23,37 @@
NETCDF4_MIME = 'application/x-netcdf4' # pylint: disable=invalid-name


class ConciseException(HarmonyException):
"""Concise Exception class for custom error messages to see in harmony api calls."""
def __init__(self, original_exception):
# Ensure we can extract traceback information
if original_exception.__traceback__ is None:
# Capture the current traceback if not already present
try:
raise original_exception
except type(original_exception):
original_exception.__traceback__ = sys.exc_info()[2]

# Extract the last traceback entry (most recent call) for the error location
tb = traceback.extract_tb(original_exception.__traceback__)[-1]

# Get the error details: file, line, function, and message
filename = tb.filename
lineno = tb.lineno
funcname = tb.name
error_msg = str(original_exception)

# Format the error message to be more readable
readable_message = (f"Error in file '{filename}', line {lineno}, in function '{funcname}': "
f"{error_msg}")

# Call the parent class constructor with the formatted message and category
super().__init__(readable_message, 'podaac/concise')

# Store the original exception for potential further investigation
self.original_exception = original_exception


class ConciseService(BaseHarmonyAdapter):
"""
A harmony-service-lib wrapper around the Concise module. This wrapper does
Expand All @@ -32,7 +66,7 @@ def invoke(self):
Primary entrypoint into the service wrapper. Overrides BaseHarmonyAdapter.invoke
"""
if not self.catalog:
# Message-only support is being depreciated in Harmony so we should expect to
# Message-only support is being depreciated in Harmony, so we should expect to
# only see requests with catalogs when invoked with a newer Harmony instance
# https://github.com/nasa/harmony-service-lib-py/blob/21bcfbda17caf626fb14d2ac4f8673be9726b549/harmony/adapter.py#L71
raise RuntimeError('Invoking CONCISE without a STAC catalog is not supported')
Expand All @@ -42,7 +76,7 @@ def invoke(self):
def process_catalog(self, catalog: Catalog):
"""
Recursively process a catalog and all its children. Adapted from
BaseHarmonyAdapter._process_catalog_recursive to specfifically
BaseHarmonyAdapter._process_catalog_recursive to specifically
support our particular use case for many-to-one
Parameters
Expand All @@ -55,60 +89,68 @@ def process_catalog(self, catalog: Catalog):
pystac.Catalog
A new catalog containing the results from the merge
"""
result = catalog.clone()
result.id = str(uuid4())
result.clear_children()

# Get all the items from the catalog, including from child or linked catalogs
items = list(self.get_all_catalog_items(catalog))
try:
result = catalog.clone()
result.id = str(uuid4())
result.clear_children()

# Get all the items from the catalog, including from child or linked catalogs
items = list(self.get_all_catalog_items(catalog))

# Quick return if catalog contains no items
if len(items) == 0:
return result

# -- Process metadata --
bbox = []
granule_urls = []
datetimes = [
datetime.max.replace(tzinfo=timezone.utc), # start
datetime.min.replace(tzinfo=timezone.utc) # end
]

for item in items:
get_bbox(item, bbox)
get_granule_url(item, granule_urls)
get_datetime(item, datetimes)

# Items did not have a bbox; valid under spec
if len(bbox) == 0:
bbox = None

# -- Perform merging --
collection = self._get_item_source(items[0]).collection
first_granule_url = []
get_granule_url(items[0], first_granule_url)
first_url_name = Path(first_granule_url[0]).stem
filename = f'{first_url_name}_{datetimes[1].strftime("%Y%m%dT%H%M%SZ")}_{collection}_merged.nc4'

with TemporaryDirectory() as temp_dir:
self.logger.info('Starting granule downloads')
input_files = multi_core_download(granule_urls, temp_dir, self.message.accessToken, self.config)
self.logger.info('Finished granule downloads')

output_path = Path(temp_dir).joinpath(filename).resolve()
merge_netcdf_files(input_files, output_path, granule_urls, logger=self.logger)
staged_url = self._stage(str(output_path), filename, NETCDF4_MIME)

# -- Output to STAC catalog --
result.clear_items()
properties = {
"start_datetime": datetimes[0].isoformat(),
"end_datetime": datetimes[1].isoformat()
}

item = Item(str(uuid4()), bbox_to_geometry(bbox), bbox, None, properties)
asset = Asset(staged_url, title=filename, media_type=NETCDF4_MIME, roles=['data'])
item.add_asset('data', asset)
result.add_item(item)

# Quick return if catalog contains no items
if len(items) == 0:
return result

# -- Process metadata --
bbox = []
granule_urls = []
datetimes = [
datetime.max.replace(tzinfo=timezone.utc), # start
datetime.min.replace(tzinfo=timezone.utc) # end
]

for item in items:
get_bbox(item, bbox)
get_granule_url(item, granule_urls)
get_datetime(item, datetimes)

# Items did not have a bbox; valid under spec
if len(bbox) == 0:
bbox = None

# -- Perform merging --
collection = self._get_item_source(items[0]).collection
filename = f'{collection}_merged.nc4'

with TemporaryDirectory() as temp_dir:
self.logger.info('Starting granule downloads')
input_files = multi_core_download(granule_urls, temp_dir, self.message.accessToken, self.config)
self.logger.info('Finished granule downloads')

output_path = Path(temp_dir).joinpath(filename).resolve()
merge_netcdf_files(input_files, output_path, granule_urls, logger=self.logger)
staged_url = self._stage(str(output_path), filename, NETCDF4_MIME)

# -- Output to STAC catalog --
result.clear_items()
properties = {
"start_datetime": datetimes[0].isoformat(),
"end_datetime": datetimes[1].isoformat()
}

item = Item(str(uuid4()), bbox_to_geometry(bbox), bbox, None, properties)
asset = Asset(staged_url, title=filename, media_type=NETCDF4_MIME, roles=['data'])
item.add_asset('data', asset)
result.add_item(item)

return result
except Exception as ex:
raise ConciseException(ex) from ex

def _stage(self, local_filename, remote_filename, mime):
"""
Expand Down
2 changes: 1 addition & 1 deletion podaac/merger/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def is_file_empty(parent_group: nc.Dataset | nc.Group) -> bool:
return True


def merge_netcdf_files(original_input_files: list[Path], # pylint: disable=too-many-locals
def merge_netcdf_files(original_input_files: list[Path], # pylint: disable=too-many-locals,too-many-positional-arguments
output_file: str,
granule_urls,
logger=getLogger(__name__),
Expand Down
1 change: 1 addition & 0 deletions podaac/merger/merge_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def max_var_memory(file_list: list[Path], var_info: dict, max_dims) -> int:
return max_var_mem


# pylint: disable=too-many-positional-arguments
def run_merge(merged_dataset: nc.Dataset,
file_list: list[Path],
var_info: dict,
Expand Down
Loading

0 comments on commit 222365b

Please # to comment.