From 952c7d7c54072e26bddafb31a4968fde671c2c26 Mon Sep 17 00:00:00 2001 From: Bryant Chow Date: Wed, 25 Jan 2023 13:27:06 -0900 Subject: [PATCH] feature mass download (#64) * fixed exclude string making and added a bool check for deleting tmpdir in mass download function * removed accidental debug statement --- pysep/configs/test/test_mass_download.yaml | 45 +++++++ pysep/pysep.py | 134 +++++++++++++++++++-- 2 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 pysep/configs/test/test_mass_download.yaml diff --git a/pysep/configs/test/test_mass_download.yaml b/pysep/configs/test/test_mass_download.yaml new file mode 100644 index 0000000..16cd157 --- /dev/null +++ b/pysep/configs/test/test_mass_download.yaml @@ -0,0 +1,45 @@ +event_tag: 2009-04-07T201255_SOUTHERN_ALASKA +config_file: null +client: IRIS +client_debug: false +use_mass_download: true +timeout: 600 +taup_model: ak135 +event_selection: default +origin_time: '2009-04-07T20:12:55.351000Z' +seconds_before_event: 20 +seconds_after_event: 20 +event_latitude: 61.4542 +event_longitude: -149.7428 +event_depth_km: 33.033 +event_magnitude: 4.6 +networks: AK,YV,AV,AT,XZ,PN +stations: '*,-GOAT' +channels: BH? +locations: '*' +reference_time: '2009-04-07T20:12:55.351000Z' +seconds_before_ref: 100 +seconds_after_ref: 300 +phase_list: null +mindistance: 0 +maxdistance: 300.0 +minazimuth: 0 +maxazimuth: 360 +minlatitude: null +maxlatitude: null +minlongitude: null +maxlongitude: null +demean: true +detrend: true +taper_percentage: 0 +rotate: +- ENZ +- RTZ +remove_response: true +output_unit: VEL +water_level: 60 +pre_filt: default +scale_factor: 1.0 +resample_freq: 50 +remove_clipped: false +log_level: DEBUG diff --git a/pysep/pysep.py b/pysep/pysep.py index 959c1cc..1128d48 100644 --- a/pysep/pysep.py +++ b/pysep/pysep.py @@ -10,6 +10,7 @@ """ import argparse import os +import shutil import sys import yaml import warnings @@ -17,10 +18,13 @@ from glob import glob from pathlib import Path -from obspy import UTCDateTime, Stream +from obspy import UTCDateTime, Stream, Inventory, read, read_inventory from obspy.clients.fdsn import Client from obspy.clients.fdsn.header import FDSNBadRequestException +from obspy.clients.fdsn.mass_downloader import (RectangularDomain, Restrictions, + CircularDomain, MassDownloader) from obspy.core.event import Event, Origin, Magnitude +from obspy.geodetics import kilometer2degrees from pysep import logger from pysep.utils.cap_sac import (append_sac_headers, write_cap_weights_files, @@ -62,6 +66,7 @@ def __init__(self, config_file=None, event_selection="default", log_level="DEBUG", timeout=600, write_files="all", plot_files="all", llnl_db_path=None, output_dir=None, overwrite=False, legacy_naming=False, overwrite_event_tag=None, + use_mass_download=False, **kwargs): """ Define a default set of parameters @@ -172,6 +177,9 @@ def __init__(self, config_file=None, event_selection="default", :type overwrite_event_tag: str :param overwrite_event_tag: option to allow the user to set their own event tag, rather than the automatically generated one + :type use_mass_download: bool + :param use_mass_download: Use ObsPy's mass download option to download + all available stations in the region regardless of data provider. """ # Internal attribute but define first so that it sits at the top of # written config files @@ -185,6 +193,7 @@ def __init__(self, config_file=None, event_selection="default", self._user = user self._password = password self.taup_model = taup_model + self.use_mass_download = use_mass_download # Parameters related to event selection self.event_selection = event_selection @@ -385,6 +394,10 @@ def check(self): f"{acceptable_plot_files}" ) + if self.use_mass_download is True: + logger.info("will use option `mass_download`, ignoring `client` " + "and downloading data from all available data centers") + def get_client(self): """ Options to choose different Clients based on attribute `client` which @@ -720,6 +733,101 @@ def _bulk_query_waveforms_from_client(self): return st + def mass_download(self): + """ + Use ObsPy Mass downloader to grab events from a pre-determined region + + Keyword Arguments + :: + str domain_type: + How to define the search region domain + - rectangular: rectangular bounding box defined by min/max + latitude/longitude + - circular: circular bounding circle defined by the events + latitude and longitude, with radii defined by `mindistance` + and `maxdistance` + bool delete_tmpdir: + Remove the temporary directories that store the MSEED and + StationXML files which were downloaded by the mass downloader. + Saves space but also if anything fails prior to saving data, + the downloaded data will not be saved. Defaults to True. + """ + domain_type = self.kwargs.get("domain_type", "rectangular") + delete_tmpdir = self.kwargs.get("delete_tmpdir", True) + # Get around the fact that command line arguments are input as strings + if isinstance(delete_tmpdir, str): + assert(delete_tmpdir.capitalize() in ["True", "False"]) + delete_tmpdir = bool(delete_tmpdir.capitalize() == "True") + + logger.info("using ObsPy mass downloader to download waveform and " + "station metadata") + + # Define the bounding box/circle that specifies our region of interest + if domain_type == "rectangular": + logger.info("using a rectangular domain for mass downloader") + domain = RectangularDomain(minlatitude=self.minlatitude, + maxlatitude=self.maxlatitude, + minlongitude=self.minlongitude, + maxlongitude=self.maxlongitude) + elif domain_type == "circular": + logger.info("using a circular domain for mass downloader") + domain = CircularDomain( + latitude=self.event_latitude, longitude=self.event_longitude, + minradius=kilometer2degrees(self.mindistance), + maxradius=kilometer2degrees(self.maxdistance) + ) + else: + raise NotImplementedError(f"`domain_type` must be 'rectangular' or" + f"'circular'") + + # Drop any excluded stations + stations = ",".join([sta for sta in self.stations.split(",") + if "-" not in sta]) + sta_exclude = [sta[1:] for sta in self.stations.split(",") + if "-" in sta] + networks = ",".join([sta for sta in self.networks.split(",") + if "-" not in sta]) + net_exclude = [net[1:] for net in self.networks.split(",") + if "-" in net] + + # Set restrictions on the search criteria for data + restrictions = Restrictions( + starttime=self.reference_time - self.seconds_before_ref, + endtime=self.reference_time + self.seconds_after_ref, + reject_channels_with_gaps=False, minimum_length=0., + network=networks, station=stations, location=self.locations, + channel=self.channels, exclude_networks=net_exclude, + exclude_stations=sta_exclude, + ) + + # Mass downloader will download files to a temp directory which we + # will read back from to continue the workflow + tmp_dir = os.path.join(self.output_dir, "tmpdir_md") + tmp_wav = os.path.join(tmp_dir, "waveforms") + tmp_inv = os.path.join(tmp_dir, "inventory") + + mdl = MassDownloader() + mdl.download(domain, restrictions, mseed_storage=tmp_wav, + stationxml_storage=tmp_inv, download_chunk_size_in_mb=20, + threads_per_client=3, print_report=True) + + # Read back in waveforms and stationxml data + st = Stream() + for fid in glob(os.path.join(tmp_wav, "*.mseed")): + st += read(fid) + logger.info(f"mass downloader downloaded {len(st)} traces") + + inv = Inventory() + for fid in glob(os.path.join(tmp_inv, "*.xml")): + inv += read_inventory(fid) + + # Delete the tmpdir + if delete_tmpdir: + logger.info("deleting temporary mass downloader directories") + shutil.rmtree(tmp_dir) + + return st, inv + def curtail_stations(self): """ Remove stations from `inv` based on station distance, azimuth, etc. @@ -1224,17 +1332,23 @@ def run(self, event=None, inv=None, st=None, **kwargs): self.event_tag, self.output_dir = self._event_tag_and_output_dir() - if inv is None: - self.inv = self.get_stations() - else: - self.inv = inv - self.inv = self.curtail_stations() + # Standard method of retrieving waveforms from data center + if self.use_mass_download is False: + if inv is None: + self.inv = self.get_stations() + else: + self.inv = inv + self.inv = self.curtail_stations() - # Get waveforms, format and assess waveform quality - if st is None: - self.st = self.get_waveforms() + # Get waveforms, format and assess waveform quality + if st is None: + self.st = self.get_waveforms() + else: + self.st = st + # Use ObsPy's mass download option to gather all available data else: - self.st = st + self.st, self.inv = self.mass_download() + self.st = quality_check_waveforms_before_processing(self.st) self.st = append_sac_headers(self.st, self.event, self.inv) if self.taup_model is not None: