gee_catalog #1408
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: gee_catalog | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '15 0 * * *' | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout repo content | |
uses: actions/checkout@v2 # checkout the repository content to github runner | |
- name: setup python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.8' # install the python version needed | |
- name: install python packages | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install --upgrade pip | |
pip install -U pip setuptools | |
pip install pendulum beautifulsoup4 requests natsort | |
- name: Script check | |
uses: jannekem/run-python-script-action@v1 | |
with: | |
script: | | |
import requests | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import time | |
import json | |
import urllib | |
from natsort import natsorted | |
now = datetime.now() | |
catalog=[] | |
i = 1 | |
def parseurl(url): | |
global i | |
gparse = [] | |
sublinks = [] | |
try: | |
response = requests.get(url) | |
r = response.json() | |
gee_id = r["id"] | |
print(f"{i}: {gee_id}") | |
gee_title = r["title"] | |
gee_type = r["gee:type"] | |
gee_start = r["extent"]["temporal"]["interval"][0][0].split("T")[0] | |
if not r["extent"]["temporal"]["interval"][0][1] == None: | |
gee_end = r["extent"]["temporal"]["interval"][0][1].split("T")[0] | |
else: | |
gee_end = now.strftime("%Y-%m-%d") | |
gee_start_year = gee_start.split("-")[0] | |
gee_end_year = gee_end.split("-")[0] | |
gee_provider = r["providers"][0]["name"] | |
gee_tags = r["keywords"] | |
thumbnail_url = [ | |
assets["href"] for assets in r["links"] if assets["rel"] == "preview" | |
][0] | |
asset_url = [ | |
assets["href"] for assets in r["links"] if assets["rel"] == "license" | |
][0] | |
asset = { | |
"id": gee_id, | |
"provider": gee_provider, | |
"title": gee_title, | |
"start_date": gee_start, | |
"end_date": gee_end, | |
"startyear": gee_start_year, | |
"endyear": gee_end_year, | |
"type": gee_type, | |
"tags": ", ".join(gee_tags), | |
"asset_url": asset_url, | |
"thumbnail_url": thumbnail_url, | |
} | |
catalog.append(asset) | |
i = i + 1 | |
except Exception as e: | |
print(url) | |
print(e) | |
asset_list = [] | |
def yf(url): | |
page = requests.get(url) | |
if page.status_code == 200: | |
features = [ | |
assets["href"] | |
for assets in page.json()["links"] | |
if assets["rel"] == "child" | |
] | |
for feature in features: | |
if not feature.endswith("catalog.json"): | |
asset_list.append(feature) | |
else: | |
yf(feature) | |
yf(url="https://earthengine-stac.storage.googleapis.com/catalog/catalog.json") | |
item_list = natsorted(list(set(asset_list))) | |
try: | |
for items in item_list: | |
parseurl(items) | |
except Exception as e: | |
print(f"Failed for {items}") | |
print(e) | |
finally: | |
with open("gee_catalog.json", "w") as file: | |
json.dump(catalog, file, indent=4, sort_keys=True) | |
- name: json2csv | |
uses: jannekem/run-python-script-action@v1 | |
with: | |
script: | | |
import csv | |
import json | |
with open('gee_catalog.json') as f: | |
data = json.load(f) | |
with open('gee_catalog.csv','w') as csvfile: | |
writer=csv.DictWriter(csvfile,fieldnames=["id", "provider", "title", "start_date","end_date", "startyear","endyear","type","tags","asset_url","thumbnail_url"], delimiter=',',lineterminator='\n') | |
writer.writeheader() | |
for datasets in data: | |
gee_id = datasets['id'] | |
gee_provider = datasets['provider'] | |
gee_title = datasets['title'] | |
gee_start = datasets['start_date'] | |
gee_end = datasets['end_date'] | |
gee_start_year = datasets['startyear'] | |
gee_end_year = datasets['endyear'] | |
gee_type = datasets['type'] | |
gee_tags = datasets['tags'] | |
asset_url = datasets['asset_url'] | |
thumbnail_url = datasets['thumbnail_url'] | |
with open('gee_catalog.csv','a') as csvfile: | |
writer=csv.writer(csvfile,delimiter=',',lineterminator='\n') | |
writer.writerow([gee_id,gee_provider,gee_title,gee_start,gee_end,gee_start_year,gee_end_year,gee_type,gee_tags,asset_url,thumbnail_url]) | |
csvfile.close() | |
- name: file_check | |
run: ls -l -a | |
- name: commit files | |
continue-on-error: true | |
run: | | |
today=$(date +"%Y-%m-%d") | |
git config --local user.email "action@github.com" | |
git config --local user.name "GitHub Action" | |
git add -A | |
git commit -m "updated datasets ${today} UTC" -a | |
git pull origin master | |
- name: push changes | |
continue-on-error: true | |
uses: ad-m/github-push-action@v0.6.0 | |
with: | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
branch: master |